From 4c3488436d5d0d723502b8acb0d0f8e72819ca43 Mon Sep 17 00:00:00 2001 From: Stephane Geneix Date: Tue, 12 Nov 2024 11:55:15 -0800 Subject: [PATCH] source-mssql: convert to bulk CDK --- .../read/cdc/CdcPartitionsCreatorFactory.kt | 1 + .../cdk/discover/JdbcAirbyteStreamFactory.kt | 1 + .../cdk/read/JdbcPartitionsCreatorFactory.kt | 1 + .../connectors/source-mssql-v1/README.md | 33 + .../acceptance-test-config.yml | 9 + .../connectors/source-mssql-v1/build.gradle | 41 + .../source-mssql-v1/gradle.properties | 2 + .../connectors/source-mssql-v1/icon.svg | 1 + .../integration_tests/acceptance.py | 16 + .../integration_tests/seed/basic.sql | 228 ++++ .../integration_tests/seed/full.sql | 320 +++++ .../seed/full_without_nulls.sql | 380 ++++++ .../connectors/source-mssql-v1/metadata.yaml | 66 ++ .../source/mssql/MsSqlSpecConstants.java | 0 .../MssqlCdcConnectorMetadataInjector.java | 0 .../source/mssql/MssqlCdcHelper.java | 0 .../mssql/MssqlCdcSavedInfoFetcher.java | 0 .../source/mssql/MssqlCdcStateHandler.java | 0 .../source/mssql/MssqlCdcTargetPosition.java | 0 .../source/mssql/MssqlDebeziumConverter.java | 0 .../source/mssql/MssqlQueryUtils.java | 0 .../source/mssql/MssqlSource.java | 0 .../source/mssql/MssqlSourceOperations.java | 0 .../mssql/cdc/MssqlCdcStateConstants.java | 0 .../mssql/cdc/MssqlDebeziumStateUtil.java | 0 .../MssqlCursorBasedStateManager.java | 0 .../initialsync/CdcMetadataInjector.java | 0 .../MssqlInitialLoadGlobalStateManager.java | 0 .../initialsync/MssqlInitialLoadHandler.java | 0 .../MssqlInitialLoadRecordIterator.java | 0 .../MssqlInitialLoadStateManager.java | 0 .../MssqlInitialLoadStreamStateManager.java | 0 .../initialsync/MssqlInitialReadUtil.java | 0 .../mssql/MSSqlSourceExceptionHandler.kt | 0 .../src/main/resources/spec.json | 0 .../AbstractMssqlSourceDatatypeTest.java | 0 .../AbstractSshMssqlSourceAcceptanceTest.java | 0 .../mssql/CdcMssqlSourceAcceptanceTest.java | 0 .../mssql/CdcMssqlSourceDatatypeTest.java | 0 ...ntSslEnabledMssqlSourceAcceptanceTest.java | 0 .../mssql/MssqlSourceAcceptanceTest.java | 0 .../source/mssql/MssqlSourceDatatypeTest.java | 0 .../mssql/MssqlSourceOperationsTest.java | 0 .../SshKeyMssqlSourceAcceptanceTest.java | 0 .../SshPasswordMssqlSourceAcceptanceTest.java | 0 .../SslEnabledMssqlSourceAcceptanceTest.java | 0 .../resources/dummy_config.json | 0 .../resources/expected_spec.json | 0 .../mssql/FillMsSqlTestDbScriptTest.java | 0 .../mssql/MssqlSourcePerformanceTest.java | 0 .../sql/create_mssql_benchmarks.sql | 0 .../source/mssql/CdcMssqlSourceTest.java | 0 .../source/mssql/CdcMssqlSslSourceTest.java | 0 .../source/mssql/CdcStateCompressionTest.java | 0 .../mssql/CloudDeploymentMssqlTest.java | 0 .../source/mssql/MssqlAgentStateTest.java | 0 .../source/mssql/MssqlCdcHelperTest.java | 0 .../mssql/MssqlDataSourceFactoryTest.java | 0 .../mssql/MssqlDebeziumStateUtilTest.java | 0 .../mssql/MssqlInitialLoadHandlerTest.java | 0 .../mssql/MssqlJdbcSourceAcceptanceTest.java | 0 .../source/mssql/MssqlSourceTest.java | 0 .../source/mssql/MssqlSslSourceTest.java | 0 .../source/mssql/MssqlStressTest.java | 0 .../source/mssql/MsSQLContainerFactory.java | 0 .../source/mssql/MsSQLTestDatabase.java | 0 ...sSqlTestDatabaseWithBackgroundThreads.java | 0 .../connectors/source-mssql/build.gradle | 28 +- .../connectors/source-mssql/metadata.yaml | 12 - .../mssql/MSSqlSourceExceptionHandler.kt | 33 + .../source/mssql/MsSqlSpecConstants.java | 15 + .../MssqlCdcConnectorMetadataInjector.java | 75 ++ .../source/mssql/MssqlCdcHelper.java | 141 +++ .../mssql/MssqlCdcSavedInfoFetcher.java | 41 + .../source/mssql/MssqlCdcStateHandler.java | 76 ++ .../source/mssql/MssqlCdcTargetPosition.java | 144 +++ .../source/mssql/MssqlDebeziumConverter.java | 215 ++++ .../source/mssql/MssqlQueryUtils.java | 303 +++++ .../source/mssql/MssqlSource.java | 652 +++++++++++ .../source/mssql/MssqlSourceOperations.java | 199 ++++ .../mssql/cdc/MssqlCdcStateConstants.java | 13 + .../mssql/cdc/MssqlDebeziumStateUtil.java | 305 +++++ .../MssqlCursorBasedStateManager.java | 88 ++ .../initialsync/CdcMetadataInjector.java | 29 + .../MssqlInitialLoadGlobalStateManager.java | 173 +++ .../initialsync/MssqlInitialLoadHandler.java | 268 +++++ .../MssqlInitialLoadRecordIterator.java | 200 ++++ .../MssqlInitialLoadStateManager.java | 112 ++ .../MssqlInitialLoadStreamStateManager.java | 75 ++ .../initialsync/MssqlInitialReadUtil.java | 558 +++++++++ .../src.bak/main/resources/spec.json | 188 +++ .../AbstractMssqlSourceDatatypeTest.java | 359 ++++++ .../AbstractSshMssqlSourceAcceptanceTest.java | 152 +++ .../mssql/CdcMssqlSourceAcceptanceTest.java | 247 ++++ .../mssql/CdcMssqlSourceDatatypeTest.java | 79 ++ ...ntSslEnabledMssqlSourceAcceptanceTest.java | 48 + .../mssql/MssqlSourceAcceptanceTest.java | 215 ++++ .../source/mssql/MssqlSourceDatatypeTest.java | 31 + .../mssql/MssqlSourceOperationsTest.java | 94 ++ .../SshKeyMssqlSourceAcceptanceTest.java | 16 + .../SshPasswordMssqlSourceAcceptanceTest.java | 16 + .../SslEnabledMssqlSourceAcceptanceTest.java | 41 + .../resources/dummy_config.json | 7 + .../resources/expected_spec.json | 305 +++++ .../mssql/FillMsSqlTestDbScriptTest.java | 82 ++ .../mssql/MssqlSourcePerformanceTest.java | 55 + .../sql/create_mssql_benchmarks.sql | 305 +++++ .../source/mssql/CdcMssqlSourceTest.java | 686 +++++++++++ .../source/mssql/CdcMssqlSslSourceTest.java | 71 ++ .../source/mssql/CdcStateCompressionTest.java | 266 +++++ .../mssql/CloudDeploymentMssqlTest.java | 121 ++ .../source/mssql/MssqlAgentStateTest.java | 121 ++ .../source/mssql/MssqlCdcHelperTest.java | 50 + .../mssql/MssqlDataSourceFactoryTest.java | 35 + .../mssql/MssqlDebeziumStateUtilTest.java | 29 + .../mssql/MssqlInitialLoadHandlerTest.java | 36 + .../mssql/MssqlJdbcSourceAcceptanceTest.java | 482 ++++++++ .../source/mssql/MssqlSourceTest.java | 148 +++ .../source/mssql/MssqlSslSourceTest.java | 122 ++ .../source/mssql/MssqlStressTest.java | 46 + .../source/mssql/MsSQLContainerFactory.java | 66 ++ .../source/mssql/MsSQLTestDatabase.java | 428 +++++++ ...sSqlTestDatabaseWithBackgroundThreads.java | 306 +++++ .../source/mssql/MsSqlJdbcPartitionFactory.kt | 356 ++++++ ...MsSqlServerCdcInitialSnapshotStateValue.kt | 50 + .../mssql/MsSqlServerDebeziumOperations.kt | 47 + .../mssql/MsSqlServerFieldTypeMapper.kt | 108 ++ .../source/mssql/MsSqlServerJdbcPartition.kt | 351 ++++++ .../mssql/MsSqlServerJdbcStreamStateValue.kt | 92 ++ .../mssql/MsSqlServerSelectQueryGenerator.kt | 111 ++ .../source/mssql/MsSqlServerSource.kt | 18 + .../mssql/MsSqlServerSourceConfiguration.kt | 96 ++ .../source/mssql/MsSqlServerStreamFactory.kt | 21 + ...verEncryptionConfigurationSpecification.kt | 73 ++ ...icationMethodConfigurationSpecification.kt | 91 ++ ...lServerSourceConfigurationSpecification.kt | 119 ++ .../src/main/resources/application.yml | 12 + .../mssql/MsSqlServerContainerFactory.kt | 123 ++ .../MsSqlServerCursorBasedIntegrationTest.kt | 172 +++ ...verSourceConfigurationSpecificationTest.kt | 72 ++ .../MsSqlServerSourceConfigurationTest.kt | 150 +++ ...sSqlServerSourceDatatypeIntegrationTest.kt | 467 ++++++++ ...SqlServerSourceSelectQueryGeneratorTest.kt | 141 +++ .../mssql/MsSqlServerSpecIntegrationTest.kt | 190 +++ .../mssql/MysqlCdcDatatypeIntegrationTest.kt | 467 ++++++++ .../source/mssql/MysqlCdcIntegrationTest.kt | 173 +++ .../mssql/MysqlJdbcPartitionFactoryTest.kt | 317 +++++ .../MysqlSourceTestConfigurationFactory.kt | 29 + .../test/resources/catalog-cdc-dbo-users.json | 125 ++ .../resources/catalog-cdc-single-stream.json | 44 + .../src/test/resources/catalog-cdc.json | 1033 +++++++++++++++++ .../container-license-acceptance.txt | 1 + .../src/test/resources/expected_spec.json | 203 ++++ .../MysqlSourceConfigurationSpecification.kt | 4 +- .../src/test/resources/expected-spec.json | 4 +- 155 files changed, 15333 insertions(+), 34 deletions(-) create mode 100644 airbyte-integrations/connectors/source-mssql-v1/README.md create mode 100644 airbyte-integrations/connectors/source-mssql-v1/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-mssql-v1/build.gradle create mode 100644 airbyte-integrations/connectors/source-mssql-v1/gradle.properties create mode 100644 airbyte-integrations/connectors/source-mssql-v1/icon.svg create mode 100644 airbyte-integrations/connectors/source-mssql-v1/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/basic.sql create mode 100644 airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full.sql create mode 100644 airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full_without_nulls.sql create mode 100644 airbyte-integrations/connectors/source-mssql-v1/metadata.yaml rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/resources/spec.json (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/resources/dummy_config.json (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/resources/expected_spec.json (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-performance/sql/create_mssql_benchmarks.sql (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java (100%) create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/main/resources/spec.json create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/resources/dummy_config.json create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-integration/resources/expected_spec.json create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test-performance/sql/create_mssql_benchmarks.sql create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java create mode 100644 airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlJdbcPartitionFactory.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCdcInitialSnapshotStateValue.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerDebeziumOperations.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerFieldTypeMapper.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartition.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcStreamStateValue.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSelectQueryGenerator.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSource.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfiguration.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerStreamFactory.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerEncryptionConfigurationSpecification.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerReplicationMethodConfigurationSpecification.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerSourceConfigurationSpecification.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/resources/application.yml create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerContainerFactory.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCursorBasedIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationSpecificationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceDatatypeIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceSelectQueryGeneratorTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSpecIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcDatatypeIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlJdbcPartitionFactoryTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlSourceTestConfigurationFactory.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-dbo-users.json create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-single-stream.json create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc.json create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/container-license-acceptance.txt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/expected_spec.json diff --git a/airbyte-cdk/bulk/toolkits/extract-cdc/src/main/kotlin/io/airbyte/cdk/read/cdc/CdcPartitionsCreatorFactory.kt b/airbyte-cdk/bulk/toolkits/extract-cdc/src/main/kotlin/io/airbyte/cdk/read/cdc/CdcPartitionsCreatorFactory.kt index 728bb637713ea..cb9dec3a128a5 100644 --- a/airbyte-cdk/bulk/toolkits/extract-cdc/src/main/kotlin/io/airbyte/cdk/read/cdc/CdcPartitionsCreatorFactory.kt +++ b/airbyte-cdk/bulk/toolkits/extract-cdc/src/main/kotlin/io/airbyte/cdk/read/cdc/CdcPartitionsCreatorFactory.kt @@ -9,6 +9,7 @@ import io.airbyte.cdk.read.FeedBootstrap import io.airbyte.cdk.read.GlobalFeedBootstrap import io.airbyte.cdk.read.PartitionsCreator import io.airbyte.cdk.read.PartitionsCreatorFactory +import io.github.oshai.kotlinlogging.KotlinLogging import io.micronaut.core.annotation.Order import jakarta.inject.Singleton import java.util.concurrent.atomic.AtomicReference diff --git a/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/discover/JdbcAirbyteStreamFactory.kt b/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/discover/JdbcAirbyteStreamFactory.kt index f702d3b506d91..7f71938a7c429 100644 --- a/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/discover/JdbcAirbyteStreamFactory.kt +++ b/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/discover/JdbcAirbyteStreamFactory.kt @@ -8,6 +8,7 @@ import io.airbyte.cdk.jdbc.JsonStringFieldType import io.airbyte.cdk.jdbc.NCharacterStreamFieldType import io.airbyte.cdk.jdbc.NClobFieldType import io.airbyte.protocol.models.v0.SyncMode +import io.github.oshai.kotlinlogging.KotlinLogging /** [JdbcAirbyteStreamFactory] implements [createGlobal] and [createNonGlobal] for JDBC sourcesx. */ interface JdbcAirbyteStreamFactory : AirbyteStreamFactory, MetaFieldDecorator { diff --git a/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/read/JdbcPartitionsCreatorFactory.kt b/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/read/JdbcPartitionsCreatorFactory.kt index c4bbdf4cbe4d9..994d7196d2f02 100644 --- a/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/read/JdbcPartitionsCreatorFactory.kt +++ b/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/read/JdbcPartitionsCreatorFactory.kt @@ -5,6 +5,7 @@ package io.airbyte.cdk.read import io.airbyte.cdk.jdbc.JDBC_PROPERTY_PREFIX +import io.github.oshai.kotlinlogging.KotlinLogging import io.micronaut.context.annotation.Requires import jakarta.inject.Singleton diff --git a/airbyte-integrations/connectors/source-mssql-v1/README.md b/airbyte-integrations/connectors/source-mssql-v1/README.md new file mode 100644 index 0000000000000..78a636b36e0f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/README.md @@ -0,0 +1,33 @@ +# MsSQL (SQL Server) Source + +## Performance Test + +To run performance tests in commandline: + +```shell +./gradlew :airbyte-integrations:connectors:source-mssql:performanceTest [--cpulimit=cpulimit/] [--memorylimit=memorylimit/] +``` + +In pull request: + +```shell +/test-performance connector=connectors/source-mssql [--cpulimit=cpulimit/] [--memorylimit=memorylimit/] +``` + +- `cpulimit`: Limit the number of CPUs. The minimum is `2`. E.g. `--cpulimit=cpulimit/2`. +- `memorylimit`: Limit the size of the memory. Must include the unit at the end (e.g. `MB`, `GB`). The minimum size is `6MB`. E.g. `--memorylimit=memorylimit/4GB`. +- When none of the CPU or memory limit is provided, the performance tests will run without memory or CPU limitations. The available resource will be bound that those specified in `ResourceRequirements.java`. + +### Use MsSQL script to populate the benchmark database + +In order to create a database with a certain number of tables, and a certain number of records in each of them, +you need to follow a few simple steps. + +1. Create a new database. +2. Follow the TODOs in [create_mssql_benchmarks.sql](src/test-performance/sql/create_mssql_benchmarks.sql) to change the number of tables, and the number of records of different sizes. +3. Execute the script with your changes for the new database. You can run the script with the MySQL client: + ```bash + cd airbyte-integrations/connectors/source-mssql + sqlcmd -S Serverinstance -E -i src/test-performance/sql/create_mssql_benchmarks.sql + ``` +4. After the script finishes its work, you will receive the number of tables specified in the script, with names starting with **test_0** and ending with **test\_(the number of tables minus 1)**. diff --git a/airbyte-integrations/connectors/source-mssql-v1/acceptance-test-config.yml b/airbyte-integrations/connectors/source-mssql-v1/acceptance-test-config.yml new file mode 100644 index 0000000000000..706d04b5d0b94 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/acceptance-test-config.yml @@ -0,0 +1,9 @@ +# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-mssql:dev +tests: + spec: + - spec_path: "src/test-integration/resources/expected_spec.json" + config_path: "src/test-integration/resources/dummy_config.json" + backward_compatibility_tests_config: + disable_for_version: "0.4.25" diff --git a/airbyte-integrations/connectors/source-mssql-v1/build.gradle b/airbyte-integrations/connectors/source-mssql-v1/build.gradle new file mode 100644 index 0000000000000..14b581fdd24d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/build.gradle @@ -0,0 +1,41 @@ +plugins { + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.45.1' + features = ['db-sources'] + useLocalCdk = false +} + +java { + // TODO: rewrite code to avoid javac warnings in the first place + compileJava { + options.compilerArgs += "-Xlint:-try,-rawtypes" + } + compileTestFixturesJava { + options.compilerArgs += "-Xlint:-this-escape" + } +} + +application { + mainClass = 'io.airbyte.integrations.source.mssql.MssqlSource' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + implementation 'com.microsoft.sqlserver:mssql-jdbc:12.6.1.jre11' + implementation 'io.debezium:debezium-embedded:2.7.1.Final' + implementation 'io.debezium:debezium-connector-sqlserver:2.6.2.Final' + implementation 'org.codehaus.plexus:plexus-utils:3.4.2' + + testFixturesImplementation 'org.testcontainers:mssqlserver:1.19.0' + + testImplementation 'org.awaitility:awaitility:4.2.0' + testImplementation 'org.hamcrest:hamcrest-all:1.3' + testImplementation 'org.testcontainers:mssqlserver:1.19.0' +} + +compileKotlin { + +} diff --git a/airbyte-integrations/connectors/source-mssql-v1/gradle.properties b/airbyte-integrations/connectors/source-mssql-v1/gradle.properties new file mode 100644 index 0000000000000..9e4d90aa65087 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/gradle.properties @@ -0,0 +1,2 @@ +testExecutionConcurrency=-1 +JunitMethodExecutionTimeout=5 m \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql-v1/icon.svg b/airbyte-integrations/connectors/source-mssql-v1/icon.svg new file mode 100644 index 0000000000000..edcaeb77c8f22 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql-v1/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/acceptance.py new file mode 100644 index 0000000000000..9e6409236281f --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("connector_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/basic.sql b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/basic.sql new file mode 100644 index 0000000000000..616bc1b2e8975 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/basic.sql @@ -0,0 +1,228 @@ +CREATE + DATABASE MSSQL_BASIC; + +USE MSSQL_BASIC; + +CREATE + TABLE + dbo.TEST_DATASET( + id INTEGER PRIMARY KEY, + test_column_1 BIGINT, + test_column_10 FLOAT, + test_column_11 REAL, + test_column_12 DATE, + test_column_13 smalldatetime, + test_column_14 datetime, + test_column_15 datetime2, + test_column_16 TIME, + test_column_18 CHAR, + test_column_2 INT, + test_column_20 text, + test_column_21 nchar, + test_column_22 nvarchar(MAX), + test_column_23 ntext, + test_column_25 VARBINARY(3), + test_column_3 SMALLINT, + test_column_4 tinyint, + test_column_6 DECIMAL( + 5, + 2 + ), + test_column_7 NUMERIC + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 1, + - 9223372036854775808, + '123', + '123', + '0001-01-01', + '1900-01-01', + '1753-01-01', + '0001-01-01', + '13:00:01', + 'a', + - 2147483648, + 'a', + 'a', + 'a', + 'a', + CAST( + 'ABC' AS VARBINARY + ), + - 32768, + 0, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 2, + 9223372036854775807, + '1234567890.1234567', + '1234567890.1234567', + '9999-12-31', + '2079-06-06', + '9999-12-31', + '9999-12-31', + '13:00:04Z', + '*', + 2147483647, + 'abc', + '*', + 'abc', + 'abc', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 3, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + 'Some test text 123$%^&*()_', + N'ї', + N'Миші йдуть на південь, не питай чому;', + N'Миші йдуть на південь, не питай чому;', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 4, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + '', + N'ї', + N'櫻花分店', + N'櫻花分店', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 5, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + '', + N'ї', + '', + '', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 6, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + '', + N'ї', + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 7, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + '', + N'ї', + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); diff --git a/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full.sql b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full.sql new file mode 100644 index 0000000000000..9d7a8a920429e --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full.sql @@ -0,0 +1,320 @@ +CREATE + DATABASE MSSQL_FULL; + +USE MSSQL_FULL; + +CREATE + TABLE + dbo.TEST_DATASET( + id INTEGER PRIMARY KEY, + test_column_1 BIGINT, + test_column_10 FLOAT, + test_column_11 REAL, + test_column_12 DATE, + test_column_13 smalldatetime, + test_column_14 datetime, + test_column_15 datetime2, + test_column_16 TIME, + test_column_17 datetimeoffset, + test_column_18 CHAR, + test_column_19 VARCHAR(MAX) COLLATE Latin1_General_100_CI_AI_SC_UTF8, + test_column_2 INT, + test_column_20 text, + test_column_21 nchar, + test_column_22 nvarchar(MAX), + test_column_23 ntext, + test_column_24 BINARY, + test_column_25 VARBINARY(3), + test_column_26 geometry, + test_column_27 uniqueidentifier, + test_column_28 xml, + test_column_29 geography, + test_column_3 SMALLINT, + test_column_30 hierarchyid, + test_column_31 sql_variant, + test_column_4 tinyint, + test_column_5 bit, + test_column_6 DECIMAL( + 5, + 2 + ), + test_column_7 NUMERIC, + test_column_8 money, + test_column_9 smallmoney + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 1, + - 9223372036854775808, + '123', + '123', + '0001-01-01', + '1900-01-01', + '1753-01-01', + '0001-01-01', + NULL, + '0001-01-10 00:00:00 +01:00', + 'a', + 'a', + NULL, + 'a', + 'a', + 'a', + 'a', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '1', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + NULL, + '/1/1/', + 'a', + NULL, + NULL, + 999.33, + '99999', + NULL, + NULL + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 2, + 9223372036854775807, + '1234567890.1234567', + '1234567890.1234567', + '9999-12-31', + '2079-06-06', + '9999-12-31', + '9999-12-31', + '13:00:01', + '9999-01-10 00:00:00 +01:00', + '*', + 'abc', + - 2147483648, + 'abc', + '*', + 'abc', + 'abc', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + - 32768, + NULL, + 'abc', + 0, + 0, + NULL, + NULL, + '9990000.3647', + '-214748.3648' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 3, + 0, + NULL, + NULL, + '1999-01-08', + NULL, + '9999-12-31T13:00:04Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04Z', + NULL, + NULL, + N'Миші йдуть на південь, не питай чому;', + 2147483647, + 'Some test text 123$%^&*()_', + N'ї', + N'Миші йдуть на південь, не питай чому;', + N'Миші йдуть на південь, не питай чому;', + NULL, + NULL, + NULL, + NULL, + '', + NULL, + 32767, + NULL, + N'Миші йдуть на південь, не питай чому;', + 255, + 1, + NULL, + NULL, + NULL, + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 4, + NULL, + NULL, + NULL, + NULL, + NULL, + '9999-12-31T13:00:04.123Z', + NULL, + '13:00:04.123456Z', + NULL, + NULL, + N'櫻花分店', + NULL, + '', + NULL, + N'櫻花分店', + N'櫻花分店', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + N'櫻花分店', + NULL, + 'true', + NULL, + NULL, + NULL, + NULL + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 5, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + '', + NULL, + NULL, + NULL, + '', + '', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + '', + NULL, + 'false', + NULL, + NULL, + NULL, + NULL + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 6, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 7, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + N'\xF0\x9F\x9A\x80', + NULL, + NULL, + NULL, + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + N'\xF0\x9F\x9A\x80', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + ); diff --git a/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full_without_nulls.sql b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full_without_nulls.sql new file mode 100644 index 0000000000000..2b6483f9e5694 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full_without_nulls.sql @@ -0,0 +1,380 @@ +CREATE + DATABASE MSSQL_FULL_NN; + +USE MSSQL_FULL_NN; + +CREATE + TABLE + dbo.TEST_DATASET( + id INTEGER PRIMARY KEY, + test_column_1 BIGINT, + test_column_10 FLOAT, + test_column_11 REAL, + test_column_12 DATE, + test_column_13 smalldatetime, + test_column_14 datetime, + test_column_15 datetime2, + test_column_16 TIME, + test_column_17 datetimeoffset, + test_column_18 CHAR, + test_column_19 VARCHAR(MAX) COLLATE Latin1_General_100_CI_AI_SC_UTF8, + test_column_2 INT, + test_column_20 text, + test_column_21 nchar, + test_column_22 nvarchar(MAX), + test_column_23 ntext, + test_column_24 BINARY, + test_column_25 VARBINARY(3), + test_column_26 geometry, + test_column_27 uniqueidentifier, + test_column_28 xml, + test_column_29 geography, + test_column_3 SMALLINT, + test_column_30 hierarchyid, + test_column_31 sql_variant, + test_column_4 tinyint, + test_column_5 bit, + test_column_6 DECIMAL( + 5, + 2 + ), + test_column_7 NUMERIC, + test_column_8 money, + test_column_9 smallmoney + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 1, + - 9223372036854775808, + '123', + '123', + '0001-01-01', + '1900-01-01', + '1753-01-01', + '0001-01-01', + '13:00:01', + '0001-01-10 00:00:00 +01:00', + 'a', + 'a', + - 2147483648, + 'a', + 'a', + 'a', + 'a', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '1', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + - 32768, + '/1/1/', + 'a', + 0, + 0, + 999.33, + '99999', + '9990000.3647', + '-214748.3648' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 2, + 9223372036854775807, + '1234567890.1234567', + '1234567890.1234567', + '9999-12-31', + '2079-06-06', + '9999-12-31', + '9999-12-31', + '13:00:04Z', + '9999-01-10 00:00:00 +01:00', + '*', + 'abc', + 2147483647, + 'abc', + '*', + 'abc', + 'abc', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + 'abc', + 255, + 1, + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 3, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + N'Миші йдуть на південь, не питай чому;', + 2147483647, + 'Some test text 123$%^&*()_', + N'ї', + N'Миші йдуть на південь, не питай чому;', + N'Миші йдуть на південь, не питай чому;', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + N'Миші йдуть на південь, не питай чому;', + 255, + 'true', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 4, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + N'櫻花分店', + 2147483647, + '', + N'ї', + N'櫻花分店', + N'櫻花分店', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + N'櫻花分店', + 255, + 'false', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 5, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + '', + 2147483647, + '', + N'ї', + '', + '', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + '', + 255, + 'false', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 6, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + N'\xF0\x9F\x9A\x80', + 2147483647, + '', + N'ї', + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + N'\xF0\x9F\x9A\x80', + 255, + 'false', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 7, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + N'\xF0\x9F\x9A\x80', + 2147483647, + '', + N'ї', + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + N'\xF0\x9F\x9A\x80', + 255, + 'false', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); diff --git a/airbyte-integrations/connectors/source-mssql-v1/metadata.yaml b/airbyte-integrations/connectors/source-mssql-v1/metadata.yaml new file mode 100644 index 0000000000000..62f33acd02e8b --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/metadata.yaml @@ -0,0 +1,66 @@ +data: + ab_internal: + ql: 200 + sl: 100 + allowedHosts: + hosts: + - ${host} + - ${tunnel_method.tunnel_host} + connectorSubtype: database + connectorType: source + definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 + dockerImageTag: 4.1.16 + dockerRepository: airbyte/source-mssql + documentationUrl: https://docs.airbyte.com/integrations/sources/mssql + githubIssueLabel: source-mssql + icon: mssql.svg + license: ELv2 + maxSecondsBetweenMessages: 7200 + name: Microsoft SQL Server (MSSQL) + registryOverrides: + cloud: + enabled: true + oss: + enabled: true + releaseStage: generally_available + supportLevel: certified + tags: + - language:java + releases: + breakingChanges: + 4.0.0: + message: "We have overhauled our MSSQL source connector and it is now supported by the Airbyte team! To benefit from new features, including terabyte-sized table support, reliability improvements, expanded datetime data types, and various bug fixes, please opt in to the 4.0.0 version." + upgradeDeadline: "2024-04-07" + 3.0.0: + message: "Remapped columns of types: date, datetime, datetime2, datetimeoffset, smalldatetime, and time from `String` to their appropriate Airbyte types. Customers whose streams have columns with the affected data types must take action with their connections." + upgradeDeadline: "2023-12-07" + 2.0.0: + message: "Add default cursor for cdc" + upgradeDeadline: "2023-08-23" + connectorTestSuitesOptions: + - suite: unitTests + - suite: integrationTests + testSecrets: + - name: SECRET_SOURCE-MSSQL__CREDS + fileName: config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store + - name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS + fileName: performance-config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store + - suite: acceptanceTests + testSecrets: + - name: SECRET_SOURCE-MSSQL__CREDS + fileName: config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store + - name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS + fileName: performance-config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt b/airbyte-integrations/connectors/source-mssql-v1/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt rename to airbyte-integrations/connectors/source-mssql-v1/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt diff --git a/airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json b/airbyte-integrations/connectors/source-mssql-v1/src/main/resources/spec.json similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json rename to airbyte-integrations/connectors/source-mssql-v1/src/main/resources/spec.json diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/resources/dummy_config.json b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/resources/dummy_config.json similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/resources/dummy_config.json rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/resources/dummy_config.json diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/resources/expected_spec.json b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/resources/expected_spec.json similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/resources/expected_spec.json rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/resources/expected_spec.json diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/sql/create_mssql_benchmarks.sql b/airbyte-integrations/connectors/source-mssql-v1/src/test-performance/sql/create_mssql_benchmarks.sql similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-performance/sql/create_mssql_benchmarks.sql rename to airbyte-integrations/connectors/source-mssql-v1/src/test-performance/sql/create_mssql_benchmarks.sql diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java b/airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java rename to airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java b/airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java rename to airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java b/airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java rename to airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java diff --git a/airbyte-integrations/connectors/source-mssql/build.gradle b/airbyte-integrations/connectors/source-mssql/build.gradle index 14b581fdd24d0..1c1e57ce977a4 100644 --- a/airbyte-integrations/connectors/source-mssql/build.gradle +++ b/airbyte-integrations/connectors/source-mssql/build.gradle @@ -1,26 +1,15 @@ plugins { - id 'airbyte-java-connector' + id 'airbyte-bulk-connector' } -airbyteJavaConnector { - cdkVersionRequired = '0.45.1' - features = ['db-sources'] - useLocalCdk = false -} - -java { - // TODO: rewrite code to avoid javac warnings in the first place - compileJava { - options.compilerArgs += "-Xlint:-try,-rawtypes" - } - compileTestFixturesJava { - options.compilerArgs += "-Xlint:-this-escape" - } +application { + mainClass = 'io.airbyte.integrations.source.mssql.MsSqlServerSource' } -application { - mainClass = 'io.airbyte.integrations.source.mssql.MssqlSource' - applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +airbyteBulkConnector { + core = 'extract' + toolkits = ['extract-jdbc', 'extract-cdc'] + cdk = 'local' } dependencies { @@ -28,12 +17,15 @@ dependencies { implementation 'io.debezium:debezium-embedded:2.7.1.Final' implementation 'io.debezium:debezium-connector-sqlserver:2.6.2.Final' implementation 'org.codehaus.plexus:plexus-utils:3.4.2' + api 'org.apache.commons:commons-lang3:3.17.0' + implementation 'org.apache.commons:commons-lang3:3.17.0' testFixturesImplementation 'org.testcontainers:mssqlserver:1.19.0' testImplementation 'org.awaitility:awaitility:4.2.0' testImplementation 'org.hamcrest:hamcrest-all:1.3' testImplementation 'org.testcontainers:mssqlserver:1.19.0' + testImplementation("io.mockk:mockk:1.12.0") } compileKotlin { diff --git a/airbyte-integrations/connectors/source-mssql/metadata.yaml b/airbyte-integrations/connectors/source-mssql/metadata.yaml index 62f33acd02e8b..c827d349b987e 100644 --- a/airbyte-integrations/connectors/source-mssql/metadata.yaml +++ b/airbyte-integrations/connectors/source-mssql/metadata.yaml @@ -39,18 +39,6 @@ data: upgradeDeadline: "2023-08-23" connectorTestSuitesOptions: - suite: unitTests - - suite: integrationTests - testSecrets: - - name: SECRET_SOURCE-MSSQL__CREDS - fileName: config.json - secretStore: - type: GSM - alias: airbyte-connector-testing-secret-store - - name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS - fileName: performance-config.json - secretStore: - type: GSM - alias: airbyte-connector-testing-secret-store - suite: acceptanceTests testSecrets: - name: SECRET_SOURCE-MSSQL__CREDS diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt new file mode 100644 index 0000000000000..24f3d588285d1 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.integrations.util.ConnectorErrorProfile +import io.airbyte.cdk.integrations.util.ConnectorExceptionHandler +import io.airbyte.cdk.integrations.util.FailureType + +class MSSqlSourceExceptionHandler : ConnectorExceptionHandler() { + override fun initializeErrorDictionary() { + + val DATABASE_READ_ERROR = "Encountered an error while reading the database, will retry" + + // include common error profiles + super.initializeErrorDictionary() + + // adding connector specific error profiles + add( + ConnectorErrorProfile( + errorClass = "MS SQL Exception", // which should we use? + regexMatchingPattern = + ".*returned an incomplete response. The connection has been closed.*", + failureType = FailureType.TRANSIENT, + externalMessage = DATABASE_READ_ERROR, + sampleInternalMessage = + "com.microsoft.sqlserver.jdbc.SQLServerException: SQL Server returned an incomplete response. The connection has been closed.", + referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/6623") + ) + ) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java new file mode 100644 index 0000000000000..e5e6eca91f416 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +// Constants defined in +// airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json. +public class MsSqlSpecConstants { + + public static final String INVALID_CDC_CURSOR_POSITION_PROPERTY = "invalid_cdc_cursor_position_behavior"; + public static final String FAIL_SYNC_OPTION = "Fail sync"; + public static final String RESYNC_DATA_OPTION = "Re-sync data"; + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java new file mode 100644 index 0000000000000..717d470a83902 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT; +import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_DEFAULT_CURSOR; +import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_EVENT_SERIAL_NO; +import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_LSN; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.integrations.debezium.CdcMetadataInjector; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes; +import java.time.Instant; +import java.util.concurrent.atomic.AtomicLong; + +public class MssqlCdcConnectorMetadataInjector implements CdcMetadataInjector { + + private final long emittedAtConverted; + + // This now makes this class stateful. Please make sure to use the same instance within a sync + private final AtomicLong recordCounter = new AtomicLong(1); + private static final long ONE_HUNDRED_MILLION = 100_000_000; + private static MssqlCdcConnectorMetadataInjector mssqlCdcConnectorMetadataInjector; + + private MssqlCdcConnectorMetadataInjector(final Instant emittedAt) { + this.emittedAtConverted = emittedAt.getEpochSecond() * ONE_HUNDRED_MILLION; + } + + public static MssqlCdcConnectorMetadataInjector getInstance(final Instant emittedAt) { + if (mssqlCdcConnectorMetadataInjector == null) { + mssqlCdcConnectorMetadataInjector = new MssqlCdcConnectorMetadataInjector(emittedAt); + } + + return mssqlCdcConnectorMetadataInjector; + } + + @Override + public void addMetaData(final ObjectNode event, final JsonNode source) { + final String commitLsn = source.get("commit_lsn").asText(); + final String eventSerialNo = source.get("event_serial_no").asText(); + event.put(CDC_LSN, commitLsn); + event.put(CDC_EVENT_SERIAL_NO, eventSerialNo); + event.put(CDC_DEFAULT_CURSOR, getCdcDefaultCursor()); + } + + @Override + public void addMetaDataToRowsFetchedOutsideDebezium(final ObjectNode record, + final String transactionTimestamp, + final MssqlDebeziumStateAttributes debeziumStateAttributes) { + record.put(CDC_UPDATED_AT, transactionTimestamp); + record.put(CDC_EVENT_SERIAL_NO, 1); + record.put(CDC_LSN, debeziumStateAttributes.lsn().toString()); + record.put(CDC_DELETED_AT, (String) null); + record.put(CDC_DEFAULT_CURSOR, getCdcDefaultCursor()); + } + + @Override + public String namespace(final JsonNode source) { + return source.get("schema").asText(); + } + + @Override + public String name(JsonNode source) { + return source.get("table").asText(); + } + + private Long getCdcDefaultCursor() { + return this.emittedAtConverted + this.recordCounter.getAndIncrement(); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java new file mode 100644 index 0000000000000..4401b0814d20b --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.SyncMode; +import java.time.Duration; +import java.util.Properties; +import java.util.stream.Collectors; +import org.codehaus.plexus.util.StringUtils; + +public class MssqlCdcHelper { + + // legacy replication method config before version 0.4.0 + // it is an enum with possible values: STANDARD and CDC + private static final String LEGACY_REPLICATION_FIELD = "replication_method"; + // new replication method config since version 0.4.0 + // it is an oneOf object + private static final String REPLICATION_FIELD = "replication"; + private static final String REPLICATION_TYPE_FIELD = "replication_type"; + private static final String METHOD_FIELD = "method"; + + private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10L); + + // Test execution latency is lower when heartbeats are more frequent. + private static final Duration HEARTBEAT_INTERVAL_IN_TESTS = Duration.ofSeconds(1L); + + public enum ReplicationMethod { + STANDARD, + CDC + } + + @VisibleForTesting + static boolean isCdc(final JsonNode config) { + if (config != null) { + // new replication method config since version 0.4.0 + if (config.hasNonNull(LEGACY_REPLICATION_FIELD) && config.get(LEGACY_REPLICATION_FIELD).isObject()) { + final JsonNode replicationConfig = config.get(LEGACY_REPLICATION_FIELD); + return ReplicationMethod.valueOf(replicationConfig.get(METHOD_FIELD).asText()) == ReplicationMethod.CDC; + } + // legacy replication method config before version 0.4.0 + if (config.hasNonNull(LEGACY_REPLICATION_FIELD) && config.get(LEGACY_REPLICATION_FIELD).isTextual()) { + return ReplicationMethod.valueOf(config.get(LEGACY_REPLICATION_FIELD).asText()) == ReplicationMethod.CDC; + } + if (config.hasNonNull(REPLICATION_FIELD)) { + final JsonNode replicationConfig = config.get(REPLICATION_FIELD); + return ReplicationMethod.valueOf(replicationConfig.get(REPLICATION_TYPE_FIELD).asText()) == ReplicationMethod.CDC; + } + } + + return false; + } + + public static Properties getDebeziumProperties(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog, final boolean isSnapshot) { + final JsonNode config = database.getSourceConfig(); + final JsonNode dbConfig = database.getDatabaseConfig(); + + final Properties props = new Properties(); + props.setProperty("connector.class", "io.debezium.connector.sqlserver.SqlServerConnector"); + + // https://debezium.io/documentation/reference/2.2/connectors/sqlserver.html#sqlserver-property-include-schema-changes + props.setProperty("include.schema.changes", "false"); + // https://debezium.io/documentation/reference/2.2/connectors/sqlserver.html#sqlserver-property-provide-transaction-metadata + props.setProperty("provide.transaction.metadata", "false"); + + props.setProperty("converters", "mssql_converter"); + + props.setProperty("mssql_converter.type", MssqlDebeziumConverter.class.getName()); + + // If new stream(s) are added after a previously successful sync, + // the snapshot.mode needs to be initial_only since we don't want to continue streaming changes + // https://debezium.io/documentation/reference/stable/connectors/sqlserver.html#sqlserver-property-snapshot-mode + if (isSnapshot) { + props.setProperty("snapshot.mode", "initial_only"); + } else { + // If not in snapshot mode, initial will make sure that a snapshot is taken if the transaction log + // is rotated out. This will also end up read streaming changes from the transaction_log. + props.setProperty("snapshot.mode", "when_needed"); + } + + props.setProperty("snapshot.isolation.mode", "read_committed"); + + props.setProperty("schema.include.list", getSchema(catalog)); + props.setProperty("database.names", config.get(JdbcUtils.DATABASE_KEY).asText()); + + final Duration heartbeatInterval = + (database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean()) + ? HEARTBEAT_INTERVAL_IN_TESTS + : HEARTBEAT_INTERVAL; + props.setProperty("heartbeat.interval.ms", Long.toString(heartbeatInterval.toMillis())); + + if (config.has("ssl_method")) { + final JsonNode sslConfig = config.get("ssl_method"); + final String sslMethod = sslConfig.get("ssl_method").asText(); + if ("unencrypted".equals(sslMethod)) { + props.setProperty("database.encrypt", "false"); + props.setProperty("driver.trustServerCertificate", "true"); + } else if ("encrypted_trust_server_certificate".equals(sslMethod)) { + props.setProperty("driver.encrypt", "true"); + props.setProperty("driver.trustServerCertificate", "true"); + } else if ("encrypted_verify_certificate".equals(sslMethod)) { + props.setProperty("driver.encrypt", "true"); + props.setProperty("driver.trustServerCertificate", "false"); + if (dbConfig.has("trustStore") && !dbConfig.get("trustStore").asText().isEmpty()) { + props.setProperty("database.trustStore", dbConfig.get("trustStore").asText()); + } + + if (dbConfig.has("trustStorePassword") && !dbConfig.get("trustStorePassword").asText().isEmpty()) { + props.setProperty("database.trustStorePassword", dbConfig.get("trustStorePassword").asText()); + } + + if (dbConfig.has("hostNameInCertificate") && !dbConfig.get("hostNameInCertificate").asText().isEmpty()) { + props.setProperty("database.hostNameInCertificate", dbConfig.get("hostNameInCertificate").asText()); + } + } + } else { + props.setProperty("driver.trustServerCertificate", "true"); + } + + return props; + } + + private static String getSchema(final ConfiguredAirbyteCatalog catalog) { + return catalog.getStreams().stream() + .filter(s -> s.getSyncMode() == SyncMode.INCREMENTAL) + .map(ConfiguredAirbyteStream::getStream) + .map(AirbyteStream::getNamespace) + // debezium needs commas escaped to split properly + .map(x -> StringUtils.escape(x, new char[] {','}, "\\,")) + .collect(Collectors.joining(",")); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java new file mode 100644 index 0000000000000..6ad000250d327 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED; +import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET; +import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.debezium.CdcSavedInfoFetcher; +import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory; +import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState; +import java.util.Optional; + +public class MssqlCdcSavedInfoFetcher implements CdcSavedInfoFetcher { + + private final JsonNode savedOffset; + private final JsonNode savedSchemaHistory; + private final boolean isSavedSchemaHistoryCompressed; + + public MssqlCdcSavedInfoFetcher(final CdcState savedState) { + final boolean savedStatePresent = savedState != null && savedState.getState() != null; + this.savedOffset = savedStatePresent ? savedState.getState().get(MSSQL_CDC_OFFSET) : null; + this.savedSchemaHistory = savedStatePresent ? savedState.getState().get(MSSQL_DB_HISTORY) : null; + this.isSavedSchemaHistoryCompressed = + savedStatePresent && savedState.getState().has(IS_COMPRESSED) && savedState.getState().get(IS_COMPRESSED).asBoolean(); + } + + @Override + public JsonNode getSavedOffset() { + return savedOffset; + } + + @Override + public SchemaHistory> getSavedSchemaHistory() { + return new SchemaHistory<>(Optional.ofNullable(savedSchemaHistory), isSavedSchemaHistoryCompressed); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java new file mode 100644 index 0000000000000..0dc5eff6ef9e5 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED; +import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET; +import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.debezium.CdcStateHandler; +import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory; +import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteMessage.Type; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlCdcStateHandler implements CdcStateHandler { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCdcStateHandler.class); + private final StateManager stateManager; + + public MssqlCdcStateHandler(final StateManager stateManager) { + this.stateManager = stateManager; + } + + @Override + public boolean isCdcCheckpointEnabled() { + return true; + } + + @Override + public AirbyteMessage saveState(final Map offset, final SchemaHistory dbHistory) { + final Map state = new HashMap<>(); + state.put(MSSQL_CDC_OFFSET, offset); + state.put(MSSQL_DB_HISTORY, dbHistory.getSchema()); + state.put(IS_COMPRESSED, dbHistory.isCompressed()); + + final JsonNode asJson = Jsons.jsonNode(state); + LOGGER.info("debezium state offset: {}", Jsons.jsonNode(offset)); + + final CdcState cdcState = new CdcState().withState(asJson); + stateManager.getCdcStateManager().setCdcState(cdcState); + /* + * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. + * Therefore, provide an empty optional. + */ + final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty()); + return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); + } + + @Override + public AirbyteMessage saveStateAfterCompletionOfSnapshotOfNewStreams() { + LOGGER.info("Snapshot of new tables is complete, saving state"); + /* + * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. + * Therefore, provide an empty optional. + */ + final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty()); + return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); + } + + @Override + public boolean compressSchemaHistoryForState() { + return true; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java new file mode 100644 index 0000000000000..5b3de1915d4af --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.debezium.CdcTargetPosition; +import io.airbyte.cdk.integrations.debezium.internals.ChangeEventWithMetadata; +import io.airbyte.cdk.integrations.debezium.internals.SnapshotMetadata; +import io.airbyte.commons.json.Jsons; +import io.debezium.connector.sqlserver.Lsn; +import java.io.IOException; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlCdcTargetPosition implements CdcTargetPosition { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCdcTargetPosition.class); + + public final Lsn targetLsn; + + public MssqlCdcTargetPosition(final Lsn targetLsn) { + this.targetLsn = targetLsn; + } + + @Override + public boolean reachedTargetPosition(final ChangeEventWithMetadata changeEventWithMetadata) { + if (changeEventWithMetadata.isSnapshotEvent()) { + return false; + } else if (SnapshotMetadata.LAST == changeEventWithMetadata.getSnapshotMetadata()) { + LOGGER.info("Signalling close because Snapshot is complete"); + return true; + } else { + final Lsn recordLsn = extractLsn(changeEventWithMetadata.getEventValueAsJson()); + final boolean isEventLSNAfter = targetLsn.compareTo(recordLsn) <= 0; + if (isEventLSNAfter) { + LOGGER.info("Signalling close because record's LSN : " + recordLsn + " is after target LSN : " + targetLsn); + } + return isEventLSNAfter; + } + } + + @Override + public Lsn extractPositionFromHeartbeatOffset(final Map sourceOffset) { + final Object commitLsnValue = sourceOffset.get("commit_lsn"); + return (commitLsnValue == null) ? Lsn.NULL : Lsn.valueOf(commitLsnValue.toString()); + } + + private Lsn extractLsn(final JsonNode valueAsJson) { + return Optional.ofNullable(valueAsJson.get("source")) + .flatMap(source -> Optional.ofNullable(source.get("commit_lsn").asText())) + .map(Lsn::valueOf) + .orElseThrow(() -> new IllegalStateException("Could not find LSN")); + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final MssqlCdcTargetPosition that = (MssqlCdcTargetPosition) o; + return targetLsn.equals(that.targetLsn); + } + + @Override + public int hashCode() { + return targetLsn.hashCode(); + } + + public static MssqlCdcTargetPosition getTargetPosition(final JdbcDatabase database, final String dbName) { + try { + // We might have to wait a bit before querying the max_lsn to give the CDC capture job + // a chance to catch up. This is important in tests, where reads might occur in quick succession + // which might leave the CT tables (which Debezium consumes) in a stale state. + final JsonNode sourceConfig = database.getSourceConfig(); + final String maxLsnQuery = """ + USE [%s]; + SELECT sys.fn_cdc_get_max_lsn() AS max_lsn; + """.formatted(dbName); + // Query the high-water mark. + final List jsonNodes = database.bufferedResultSetQuery( + connection -> connection.createStatement().executeQuery(maxLsnQuery), + JdbcUtils.getDefaultSourceOperations()::rowToJson); + Preconditions.checkState(jsonNodes.size() == 1); + + final Lsn maxLsn; + if (jsonNodes.get(0).get("max_lsn") != null) { + maxLsn = Lsn.valueOf(jsonNodes.get(0).get("max_lsn").binaryValue()); + } else { + maxLsn = Lsn.NULL; + } + LOGGER.info("identified target lsn: " + maxLsn); + return new MssqlCdcTargetPosition(maxLsn); + } catch (final SQLException | IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean isHeartbeatSupported() { + return true; + } + + @Override + public boolean reachedTargetPosition(Lsn positionFromHeartbeat) { + return positionFromHeartbeat.compareTo(targetLsn) >= 0; + } + + @Override + public boolean isEventAheadOffset(Map offset, ChangeEventWithMetadata event) { + if (offset == null || offset.size() != 1) { + return false; + } + final Lsn eventLsn = extractLsn(event.getEventValueAsJson()); + final Lsn offsetLsn = offsetToLsn(offset); + return eventLsn.compareTo(offsetLsn) > 0; + } + + @Override + public boolean isSameOffset(Map offsetA, Map offsetB) { + if ((offsetA == null || offsetA.size() != 1) || (offsetB == null || offsetB.size() != 1)) { + return false; + } + return offsetToLsn(offsetA).equals(offsetToLsn(offsetB)); + } + + private Lsn offsetToLsn(Map offset) { + final JsonNode offsetJson = Jsons.deserialize((String) offset.values().toArray()[0]); + final JsonNode commitLsnJson = offsetJson.get("commit_lsn"); + return (commitLsnJson == null || commitLsnJson.isNull()) ? Lsn.NULL : Lsn.valueOf(commitLsnJson.asText()); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java new file mode 100644 index 0000000000000..a364649cb7ea5 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.microsoft.sqlserver.jdbc.Geography; +import com.microsoft.sqlserver.jdbc.Geometry; +import com.microsoft.sqlserver.jdbc.SQLServerException; +import io.airbyte.cdk.db.DataTypeUtils; +import io.airbyte.cdk.db.jdbc.DateTimeConverter; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumConverterUtils; +import io.debezium.spi.converter.CustomConverter; +import io.debezium.spi.converter.RelationalColumn; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.time.*; +import java.time.format.DateTimeFormatter; +import java.util.*; +import microsoft.sql.DateTimeOffset; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlDebeziumConverter implements CustomConverter { + + private final Logger LOGGER = LoggerFactory.getLogger(MssqlDebeziumConverter.class); + + private final Set BINARY = Set.of("VARBINARY", "BINARY"); + private final Set DATETIME_TYPES = Set.of("DATETIME", "DATETIME2", "SMALLDATETIME"); + private final String DATE = "DATE"; + private static final String DATETIMEOFFSET = "DATETIMEOFFSET"; + private static final String TIME_TYPE = "TIME"; + private static final String SMALLMONEY_TYPE = "SMALLMONEY"; + private static final String GEOMETRY = "GEOMETRY"; + private static final String GEOGRAPHY = "GEOGRAPHY"; + + private static final String DATETIME_FORMAT_MICROSECONDS = "yyyy-MM-dd'T'HH:mm:ss[.][SSSSSS]"; + + @Override + public void configure(final Properties props) {} + + @Override + public void converterFor(final RelationalColumn field, + final ConverterRegistration registration) { + if (DATE.equalsIgnoreCase(field.typeName())) { + registerDate(field, registration); + } else if (DATETIME_TYPES.contains(field.typeName().toUpperCase())) { + registerDatetime(field, registration); + } else if (SMALLMONEY_TYPE.equalsIgnoreCase(field.typeName())) { + registerMoney(field, registration); + } else if (BINARY.contains(field.typeName().toUpperCase())) { + registerBinary(field, registration); + } else if (GEOMETRY.equalsIgnoreCase(field.typeName())) { + registerGeometry(field, registration); + } else if (GEOGRAPHY.equalsIgnoreCase(field.typeName())) { + registerGeography(field, registration); + } else if (TIME_TYPE.equalsIgnoreCase(field.typeName())) { + registerTime(field, registration); + } else if (DATETIMEOFFSET.equalsIgnoreCase(field.typeName())) { + registerDateTimeOffSet(field, registration); + } + } + + private void registerGeometry(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof byte[]) { + try { + return Geometry.deserialize((byte[]) input).toString(); + } catch (final SQLServerException e) { + LOGGER.error(e.getMessage()); + } + } + + LOGGER.warn("Uncovered Geometry class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + + private void registerGeography(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof byte[]) { + try { + return Geography.deserialize((byte[]) input).toString(); + } catch (final SQLServerException e) { + LOGGER.error(e.getMessage()); + } + } + + LOGGER.warn("Uncovered Geography class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + + private void registerDate(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + if (field.typeName().equalsIgnoreCase("DATE")) { + return DateTimeConverter.convertToDate(input); + } + return DateTimeConverter.convertToTimestamp(input); + }); + } + + private void registerDateTimeOffSet(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof DateTimeOffset) { + var offsetDateTime = ((DateTimeOffset) input).getOffsetDateTime(); + return offsetDateTime.format(DataTypeUtils.TIMESTAMPTZ_FORMATTER); + } + + LOGGER.warn("Uncovered DateTimeOffSet class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + + private void registerDatetime(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), + input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + if (input instanceof final Timestamp d) { + final LocalDateTime localDateTime = d.toLocalDateTime(); + return localDateTime.format(DateTimeFormatter.ofPattern(DATETIME_FORMAT_MICROSECONDS)); + } + + if (input instanceof final Long d) { + // During schema history creation datetime input arrives in the form of epoch nanosecond + // This is needed for example for a column defined as: + // [TransactionDate] DATETIME2 (7) DEFAULT ('2024-01-01T00:00:00.0000000') NOT NULL + final Instant instant = Instant.ofEpochMilli(d / 1000 / 1000); + final LocalDateTime localDateTime = LocalDateTime.ofInstant(instant, ZoneId.of("UTC")); + return localDateTime.format(DateTimeFormatter.ofPattern(DATETIME_FORMAT_MICROSECONDS)); + } + + return input.toString(); + }); + + } + + private void registerTime(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof Timestamp) { + return DataTypeUtils.toISOTimeString(((Timestamp) input).toLocalDateTime()); + } + + LOGGER.warn("Uncovered time class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + + private void registerMoney(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.float64(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof BigDecimal) { + return ((BigDecimal) input).doubleValue(); + } + + LOGGER.warn("Uncovered money class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + + private void registerBinary(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof byte[]) { + return Base64.getEncoder().encodeToString((byte[]) input); + } + + LOGGER.warn("Uncovered binary class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java new file mode 100644 index 0000000000000..c59d46135e912 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifierList; +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting; +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getIdentifierWithQuoting; +import static io.airbyte.integrations.source.mssql.MssqlSource.HIERARCHYID; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo; +import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus; +import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import java.math.BigDecimal; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class to define constants related to querying mssql + */ +public class MssqlQueryUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlQueryUtils.class); + private static final String MAX_OC_VALUE_QUERY = + """ + SELECT MAX(%s) as %s FROM %s; + """; + + public record TableSizeInfo(Long tableSize, Long avgRowLength) {} + + private static final String MAX_CURSOR_VALUE_QUERY = + """ + SELECT TOP 1 %s, COUNT(*) AS %s FROM %s WHERE %s = (SELECT MAX(%s) FROM %s) GROUP BY %s; + """; + public static final String INDEX_QUERY = "EXEC sp_helpindex N'%s'"; + + public record Index( + @JsonProperty("index_name") String name, + @JsonProperty("index_description") String description, + @JsonProperty("index_keys") String keys) {} + + public static final String TABLE_ESTIMATE_QUERY = + """ + EXEC sp_spaceused N'"%s"."%s"' + """; + + public static final String MAX_OC_COL = "max_oc"; + public static final String DATA_SIZE_HUMAN_READABLE = "data"; + public static final String NUM_ROWS = "rows"; + + public static void getIndexInfoForStreams(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog, final String quoteString) { + for (final ConfiguredAirbyteStream stream : catalog.getStreams()) { + final String streamName = stream.getStream().getName(); + final String schemaName = stream.getStream().getNamespace(); + final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, streamName, quoteString); + LOGGER.info("Discovering indexes for table {}", fullTableName); + try { + final String query = INDEX_QUERY.formatted(fullTableName); + LOGGER.debug("Index lookup query: {}", query); + final List jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(query).executeQuery(), + resultSet -> new MssqlSourceOperations().rowToJson(resultSet)); + if (jsonNodes != null) { + jsonNodes.stream().map(node -> Jsons.convertValue(node, Index.class)) + .forEach(i -> LOGGER.info("Index {}", i)); + } + } catch (final Exception ex) { + LOGGER.info("Failed to get index for {}", fullTableName); + } + } + + } + + public static String getMaxOcValueForStream(final JdbcDatabase database, + final ConfiguredAirbyteStream stream, + final String ocFieldName, + final String quoteString) { + final String name = stream.getStream().getName(); + final String namespace = stream.getStream().getNamespace(); + final String fullTableName = + getFullyQualifiedTableNameWithQuoting(namespace, name, quoteString); + final String maxOcQuery = String.format(MAX_OC_VALUE_QUERY, + getIdentifierWithQuoting(ocFieldName, quoteString), + MAX_OC_COL, + fullTableName); + LOGGER.info("Querying for max oc value: {}", maxOcQuery); + try { + final List jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(maxOcQuery).executeQuery(), + resultSet -> new MssqlSourceOperations().rowToJson(resultSet)); + Preconditions.checkState(jsonNodes.size() == 1); + if (jsonNodes.get(0).get(MAX_OC_COL) == null) { + LOGGER.info("Max PK is null for table {} - this could indicate an empty table", fullTableName); + return null; + } + return jsonNodes.get(0).get(MAX_OC_COL).asText(); + } catch (final SQLException e) { + throw new RuntimeException(e); + } + } + + private static long toBytes(final String filesize) { + long returnValue = -1; + final Pattern patt = Pattern.compile("([\\d.]+)[\s+]*([GMK]B)", Pattern.CASE_INSENSITIVE); + final Matcher matcher = patt.matcher(filesize); + Map powerMap = new HashMap(); + powerMap.put("GB", 3); + powerMap.put("MB", 2); + powerMap.put("KB", 1); + if (matcher.find()) { + String number = matcher.group(1).trim(); + int pow = powerMap.get(matcher.group(2).toUpperCase()); + BigDecimal bytes = new BigDecimal(number); + bytes = bytes.multiply(BigDecimal.valueOf(1024).pow(pow)); + returnValue = bytes.longValue(); + } + return returnValue; + } + + public static Map getTableSizeInfoForStreams(final JdbcDatabase database, + final List streams, + final String quoteString) { + final Map tableSizeInfoMap = new HashMap<>(); + streams.forEach(stream -> { + try { + final String name = stream.getStream().getName(); + final String namespace = stream.getStream().getNamespace(); + final String fullTableName = + getFullyQualifiedTableNameWithQuoting(name, namespace, quoteString); + final List tableEstimateResult = getTableEstimate(database, namespace, name); + + if (tableEstimateResult != null + && tableEstimateResult.size() == 1 + && tableEstimateResult.get(0).get(DATA_SIZE_HUMAN_READABLE) != null + && tableEstimateResult.get(0).get(NUM_ROWS) != null) { + final long tableEstimateBytes = toBytes(tableEstimateResult.get(0).get(DATA_SIZE_HUMAN_READABLE).asText()); + final long numRows = tableEstimateResult.get(0).get(NUM_ROWS).asLong(); + final long avgTableRowSizeBytes = numRows > 0 ? tableEstimateBytes / numRows : 0; + LOGGER.info("Stream {} size estimate is {}, average row size estimate is {}", fullTableName, tableEstimateBytes, avgTableRowSizeBytes); + final TableSizeInfo tableSizeInfo = new TableSizeInfo(tableEstimateBytes, avgTableRowSizeBytes); + final AirbyteStreamNameNamespacePair namespacePair = + new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + tableSizeInfoMap.put(namespacePair, tableSizeInfo); + } + } catch (final Exception e) { + LOGGER.warn("Error occurred while attempting to estimate sync size", e); + } + }); + return tableSizeInfoMap; + } + + /** + * Iterates through each stream and find the max cursor value and the record count which has that + * value based on each cursor field provided by the customer per stream This information is saved in + * a Hashmap with the mapping being the AirbyteStreamNameNamespacepair -> CursorBasedStatus + * + * @param database the source db + * @param streams streams to be synced + * @param stateManager stream stateManager + * @return Map of streams to statuses + */ + public static Map getCursorBasedSyncStatusForStreams(final JdbcDatabase database, + final List streams, + final StateManager stateManager, + final String quoteString) { + + final Map cursorBasedStatusMap = new HashMap<>(); + streams.forEach(stream -> { + final String name = stream.getStream().getName(); + final String namespace = stream.getStream().getNamespace(); + final String fullTableName = + getFullyQualifiedTableNameWithQuoting(namespace, name, quoteString); + + final Optional cursorInfoOptional = + stateManager.getCursorInfo(new AirbyteStreamNameNamespacePair(name, namespace)); + if (cursorInfoOptional.isEmpty()) { + throw new RuntimeException(String.format("Stream %s was not provided with an appropriate cursor", stream.getStream().getName())); + } + final CursorBasedStatus cursorBasedStatus = new CursorBasedStatus(); + final Optional maybeCursorField = Optional.ofNullable(cursorInfoOptional.get().getCursorField()); + maybeCursorField.ifPresent(cursorField -> { + LOGGER.info("Cursor {}. Querying max cursor value for {}.{}", cursorField, namespace, name); + final String quotedCursorField = getIdentifierWithQuoting(cursorField, quoteString); + final String counterField = cursorField + "_count"; + final String quotedCounterField = getIdentifierWithQuoting(counterField, quoteString); + final String cursorBasedSyncStatusQuery = String.format(MAX_CURSOR_VALUE_QUERY, + quotedCursorField, + quotedCounterField, + fullTableName, + quotedCursorField, + quotedCursorField, + fullTableName, + quotedCursorField); + final List jsonNodes; + try { + jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(cursorBasedSyncStatusQuery).executeQuery(), + resultSet -> new MssqlSourceOperations().rowToJson(resultSet)); + } catch (SQLException e) { + throw new RuntimeException("Failed to read max cursor value from %s.%s".formatted(namespace, name), e); + } + cursorBasedStatus.setCursorField(ImmutableList.of(cursorField)); + if (!jsonNodes.isEmpty()) { + final JsonNode result = jsonNodes.get(0); + cursorBasedStatus.setCursor(result.get(cursorField).asText()); + cursorBasedStatus.setCursorRecordCount(result.get(counterField).asLong()); + } + cursorBasedStatus.setStateType(StateType.CURSOR_BASED); + cursorBasedStatus.setVersion(2L); + cursorBasedStatus.setStreamName(name); + cursorBasedStatus.setStreamNamespace(namespace); + cursorBasedStatusMap.put(new AirbyteStreamNameNamespacePair(name, namespace), cursorBasedStatus); + }); + }); + + return cursorBasedStatusMap; + } + + private static List getTableEstimate(final JdbcDatabase database, final String namespace, final String name) + throws SQLException { + // Construct the table estimate query. + final String tableEstimateQuery = + String.format(TABLE_ESTIMATE_QUERY, namespace, name); + LOGGER.info("Querying for table estimate size: {}", tableEstimateQuery); + final List jsonNodes = database.bufferedResultSetQuery(conn -> conn.createStatement().executeQuery(tableEstimateQuery), + resultSet -> new MssqlSourceOperations().rowToJson(resultSet)); + Preconditions.checkState(jsonNodes.size() == 1); + LOGGER.debug("Estimate: {}", jsonNodes); + return jsonNodes; + } + + public static String prettyPrintConfiguredAirbyteStreamList(final List streamList) { + return streamList.stream().map(s -> "%s.%s".formatted(s.getStream().getNamespace(), s.getStream().getName())).collect(Collectors.joining(", ")); + } + + /** + * There is no support for hierarchyid even in the native SQL Server JDBC driver. Its value can be + * converted to a nvarchar(4000) data type by calling the ToString() method. So we make a separate + * query to get Table's MetaData, check is there any hierarchyid columns, and wrap required fields + * with the ToString() function in the final Select query. Reference: + * https://docs.microsoft.com/en-us/sql/t-sql/data-types/hierarchyid-data-type-method-reference?view=sql-server-ver15#data-type-conversion + * Note: This is where the main logic for the same method in MssqlSource. Extracted logic in order + * to be used in MssqlInitialLoadRecordIterator + * + * @return the list with Column names updated to handle functions (if nay) properly + */ + public static String getWrappedColumnNames( + final JdbcDatabase database, + final String quoteString, + final List columnNames, + final String schemaName, + final String tableName) { + final List hierarchyIdColumns = new ArrayList<>(); + try { + final String identifierQuoteString = database.getMetaData().getIdentifierQuoteString(); + final SQLServerResultSetMetaData sqlServerResultSetMetaData = (SQLServerResultSetMetaData) database + .queryMetadata(String + .format("SELECT TOP 1 %s FROM %s", // only first row is enough to get field's type + enquoteIdentifierList(columnNames, quoteString), + getFullyQualifiedTableNameWithQuoting(schemaName, tableName, quoteString))); + + // metadata will be null if table doesn't contain records + if (sqlServerResultSetMetaData != null) { + for (int i = 1; i <= sqlServerResultSetMetaData.getColumnCount(); i++) { + if (HIERARCHYID.equals(sqlServerResultSetMetaData.getColumnTypeName(i))) { + hierarchyIdColumns.add(sqlServerResultSetMetaData.getColumnName(i)); + } + } + } + + // iterate through names and replace Hierarchyid field for query is with toString() function + // Eventually would get columns like this: testColumn.toString as "testColumn" + // toString function in SQL server is the only way to get human-readable value, but not mssql + // specific HEX value + return String.join(", ", columnNames.stream() + .map( + el -> hierarchyIdColumns.contains(el) ? String.format("%s.ToString() as %s%s%s", el, identifierQuoteString, el, identifierQuoteString) + : getIdentifierWithQuoting(el, quoteString)) + .toList()); + } catch (final SQLException e) { + LOGGER.error("Failed to fetch metadata to prepare a proper request.", e); + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java new file mode 100644 index 0000000000000..de2520b069d9f --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -0,0 +1,652 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler.isAnyStreamIncrementalSyncMode; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT; +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.*; +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbReadUtil.identifyStreamsForCursorBased; +import static io.airbyte.integrations.source.mssql.MssqlCdcHelper.isCdc; +import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getCursorBasedSyncStatusForStreams; +import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getTableSizeInfoForStreams; +import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.*; +import static java.util.stream.Collectors.toList; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; +import io.airbyte.cdk.db.util.SSLCertificateUtils; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.base.Source; +import io.airbyte.cdk.integrations.base.adaptive.AdaptiveSourceRunner; +import io.airbyte.cdk.integrations.base.ssh.SshWrappedSource; +import io.airbyte.cdk.integrations.debezium.internals.*; +import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; +import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadHandler; +import io.airbyte.cdk.integrations.source.relationaldb.TableInfo; +import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus; +import io.airbyte.cdk.integrations.source.relationaldb.state.NonResumableStateMessageProducer; +import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateMessageProducer; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateGeneratorUtils; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManagerFactory; +import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator; +import io.airbyte.commons.exceptions.ConfigErrorException; +import io.airbyte.commons.functional.CheckedConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.stream.AirbyteStreamStatusHolder; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.integrations.source.mssql.cursor_based.MssqlCursorBasedStateManager; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStreamStateManager; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.CursorBasedStreams; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams; +import io.airbyte.protocol.models.CommonField; +import io.airbyte.protocol.models.v0.*; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import java.io.IOException; +import java.net.URI; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; +import java.sql.*; +import java.time.Duration; +import java.time.Instant; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.commons.lang3.RandomStringUtils; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlSource extends AbstractJdbcSource implements Source { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSource.class); + public static final String DESCRIBE_TABLE_QUERY = + """ + sp_columns "%s" + """; + public static final String NULL_CURSOR_VALUE_WITH_SCHEMA_QUERY = + """ + SELECT CASE WHEN (SELECT TOP 1 1 FROM "%s"."%s" WHERE "%s" IS NULL)=1 then 1 else 0 end as %s + """; + public static final String DRIVER_CLASS = DatabaseDriver.MSSQLSERVER.getDriverClassName(); + public static final String MSSQL_CDC_OFFSET = "mssql_cdc_offset"; + public static final String MSSQL_DB_HISTORY = "mssql_db_history"; + public static final String IS_COMPRESSED = "is_compressed"; + public static final String CDC_LSN = "_ab_cdc_lsn"; + public static final String CDC_EVENT_SERIAL_NO = "_ab_cdc_event_serial_no"; + public static final String HIERARCHYID = "hierarchyid"; + private static final int INTERMEDIATE_STATE_EMISSION_FREQUENCY = 10_000; + public static final String CDC_DEFAULT_CURSOR = "_ab_cdc_cursor"; + public static final String TUNNEL_METHOD = "tunnel_method"; + public static final String NO_TUNNEL = "NO_TUNNEL"; + public static final String SSL_METHOD = "ssl_method"; + public static final String SSL_METHOD_UNENCRYPTED = "unencrypted"; + private MssqlInitialLoadStateManager initialLoadStateManager = null; + public static final String JDBC_DELIMITER = ";"; + private List schemas; + + public static Source sshWrappedSource(final MssqlSource source) { + return new SshWrappedSource(source, JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); + } + + public MssqlSource() { + super(DRIVER_CLASS, AdaptiveStreamingQueryConfig::new, new MssqlSourceOperations()); + } + + @Override + protected AirbyteStateType getSupportedStateType(final JsonNode config) { + return MssqlCdcHelper.isCdc(config) ? AirbyteStateType.GLOBAL : AirbyteStateType.STREAM; + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) throws Exception { + // #15808 Disallow connecting to db with disable, prefer or allow SSL mode when connecting directly + // and not over SSH tunnel + if (cloudDeploymentMode()) { + if (config.has(TUNNEL_METHOD) + && config.get(TUNNEL_METHOD).has(TUNNEL_METHOD) + && config.get(TUNNEL_METHOD).get(TUNNEL_METHOD).asText().equals(NO_TUNNEL)) { + // If no SSH tunnel. + if (config.has(SSL_METHOD) && config.get(SSL_METHOD).has(SSL_METHOD) && + SSL_METHOD_UNENCRYPTED.equalsIgnoreCase(config.get(SSL_METHOD).get(SSL_METHOD).asText())) { + // Fail in case SSL method is unencrypted. + return new AirbyteConnectionStatus() + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage("Unsecured connection not allowed. " + + "If no SSH Tunnel set up, please use one of the following SSL methods: " + + "encrypted_trust_server_certificate, encrypted_verify_certificate."); + } + } + } + return super.check(config); + } + + /** + * See {@link MssqlQueryUtils#getWrappedColumnNames} + */ + @Override + protected String getWrappedColumnNames(final JdbcDatabase database, + final Connection connection, + final List columnNames, + final String schemaName, + final String tableName) { + return MssqlQueryUtils.getWrappedColumnNames(database, getQuoteString(), columnNames, schemaName, tableName); + } + + @Override + public JsonNode toDatabaseConfig(final JsonNode mssqlConfig) { + final List additionalParameters = new ArrayList<>(); + + final StringBuilder jdbcUrl = new StringBuilder( + String.format("jdbc:sqlserver://%s:%s;databaseName=%s;", + mssqlConfig.get(JdbcUtils.HOST_KEY).asText(), + mssqlConfig.get(JdbcUtils.PORT_KEY).asText(), + mssqlConfig.get(JdbcUtils.DATABASE_KEY).asText())); + + if (mssqlConfig.has("schemas") && mssqlConfig.get("schemas").isArray()) { + schemas = new ArrayList<>(); + for (final JsonNode schema : mssqlConfig.get("schemas")) { + schemas.add(schema.asText()); + } + } + + if (mssqlConfig.has("ssl_method")) { + readSsl(mssqlConfig, additionalParameters); + } else { + additionalParameters.add("trustServerCertificate=true"); + } + + if (!additionalParameters.isEmpty()) { + jdbcUrl.append(String.join(";", additionalParameters)); + } + + final ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .put(JdbcUtils.USERNAME_KEY, mssqlConfig.get(JdbcUtils.USERNAME_KEY).asText()) + .put(JdbcUtils.PASSWORD_KEY, mssqlConfig.get(JdbcUtils.PASSWORD_KEY).asText()) + .put(JdbcUtils.JDBC_URL_KEY, jdbcUrl.toString()); + + if (mssqlConfig.has(JdbcUtils.JDBC_URL_PARAMS_KEY)) { + configBuilder.put(JdbcUtils.CONNECTION_PROPERTIES_KEY, mssqlConfig.get(JdbcUtils.JDBC_URL_PARAMS_KEY)); + } + + final Map additionalParams = new HashMap<>(); + additionalParameters.forEach(param -> { + final int i = param.indexOf('='); + additionalParams.put(param.substring(0, i), param.substring(i + 1)); + }); + + configBuilder.putAll(additionalParams); + + return Jsons.jsonNode(configBuilder.build()); + } + + @Override + public Set getExcludedInternalNameSpaces() { + return Set.of( + "INFORMATION_SCHEMA", + "sys", + "spt_fallback_db", + "spt_monitor", + "spt_values", + "spt_fallback_usg", + "MSreplication_options", + "spt_fallback_dev", + "cdc"); // is this actually ok? what if the user wants cdc schema for some reason? + } + + @Override + public AirbyteCatalog discover(final JsonNode config) { + final AirbyteCatalog catalog = super.discover(config); + + if (MssqlCdcHelper.isCdc(config)) { + final List streams = catalog.getStreams().stream() + .map(MssqlSource::overrideSyncModes) + .map(MssqlSource::removeIncrementalWithoutPk) + .map(MssqlSource::setIncrementalToSourceDefined) + .map(MssqlSource::setDefaultCursorFieldForCdc) + .map(MssqlSource::addCdcMetadataColumns) + .collect(toList()); + + catalog.setStreams(streams); + } + + return catalog; + } + + @Override + public List>> discoverInternal(final JdbcDatabase database) throws Exception { + if (schemas != null && !schemas.isEmpty()) { + return schemas.stream().flatMap(schema -> { + LOGGER.info("Get columns for schema: {}", schema); + try { + return super.discoverInternal(database, schema).stream(); + } catch (Exception e) { + throw new ConfigErrorException(String.format("Error getting columns for schema: %s", schema), e); + } + }).collect(toList()); + } else { + LOGGER.info("No schemas explicitly set on UI to process, so will process all of existing schemas in DB"); + return super.discoverInternal(database); + } + } + + @Override + protected boolean verifyCursorColumnValues(final JdbcDatabase database, final String schema, final String tableName, final String columnName) + throws SQLException { + + boolean nullValExist = false; + final String resultColName = "nullValue"; + final String descQuery = String.format(DESCRIBE_TABLE_QUERY, tableName); + final Optional field = database.bufferedResultSetQuery(conn -> conn.createStatement() + .executeQuery(descQuery), + resultSet -> JdbcUtils.getDefaultSourceOperations().rowToJson(resultSet)) + .stream() + .peek(x -> LOGGER.info("MsSQL Table Structure {}, {}, {}", x.toString(), schema, tableName)) + .filter(x -> x.get("TABLE_OWNER") != null) + .filter(x -> x.get("COLUMN_NAME") != null) + .filter(x -> x.get("TABLE_OWNER").asText().equals(schema)) + .filter(x -> x.get("COLUMN_NAME").asText().equalsIgnoreCase(columnName)) + .findFirst(); + if (field.isPresent()) { + final JsonNode jsonNode = field.get(); + final JsonNode isNullable = jsonNode.get("IS_NULLABLE"); + if (isNullable != null) { + if (isNullable.asText().equalsIgnoreCase("YES")) { + final String query = String.format(NULL_CURSOR_VALUE_WITH_SCHEMA_QUERY, + schema, tableName, columnName, resultColName); + + LOGGER.debug("null value query: {}", query); + final List jsonNodes = database.bufferedResultSetQuery(conn -> conn.createStatement().executeQuery(query), + resultSet -> JdbcUtils.getDefaultSourceOperations().rowToJson(resultSet)); + Preconditions.checkState(jsonNodes.size() == 1); + nullValExist = jsonNodes.get(0).get(resultColName).booleanValue(); + LOGGER.info("null cursor value for MsSQL source : {}, shema {} , tableName {}, columnName {} ", nullValExist, schema, tableName, + columnName); + } + } + } + // return !nullValExist; + // will enable after we have sent comms to users this affects + return true; + } + + @Override + public List> getCheckOperations(final JsonNode config) + throws Exception { + final List> checkOperations = new ArrayList<>( + super.getCheckOperations(config)); + + if (MssqlCdcHelper.isCdc(config)) { + checkOperations.add(database -> assertCdcEnabledInDb(config, database)); + checkOperations.add(database -> assertCdcSchemaQueryable(config, database)); + checkOperations.add(database -> assertSqlServerAgentRunning(database)); + } + + return checkOperations; + } + + protected void assertCdcEnabledInDb(final JsonNode config, final JdbcDatabase database) + throws SQLException { + final List queryResponse = database.queryJsons(connection -> { + final String sql = "SELECT name, is_cdc_enabled FROM sys.databases WHERE name = ?"; + final PreparedStatement ps = connection.prepareStatement(sql); + ps.setString(1, config.get(JdbcUtils.DATABASE_KEY).asText()); + LOGGER.info(String.format("Checking that cdc is enabled on database '%s' using the query: '%s'", + config.get(JdbcUtils.DATABASE_KEY).asText(), sql)); + return ps; + }, sourceOperations::rowToJson); + + if (queryResponse.size() < 1) { + throw new RuntimeException(String.format( + "Couldn't find '%s' in sys.databases table. Please check the spelling and that the user has relevant permissions (see docs).", + config.get(JdbcUtils.DATABASE_KEY).asText())); + } + if (!(queryResponse.get(0).get("is_cdc_enabled").asBoolean())) { + throw new RuntimeException(String.format( + "Detected that CDC is not enabled for database '%s'. Please check the documentation on how to enable CDC on MS SQL Server.", + config.get(JdbcUtils.DATABASE_KEY).asText())); + } + } + + protected void assertCdcSchemaQueryable(final JsonNode config, final JdbcDatabase database) + throws SQLException { + final List queryResponse = database.queryJsons(connection -> { + boolean isAzureSQL = false; + + try (final Statement stmt = connection.createStatement(); + final ResultSet editionRS = stmt.executeQuery("SELECT ServerProperty('Edition')")) { + isAzureSQL = editionRS.next() && "SQL Azure".equals(editionRS.getString(1)); + } + + // Azure SQL does not support USE clause + final String sql = + isAzureSQL ? "SELECT * FROM cdc.change_tables" + : "USE [" + config.get(JdbcUtils.DATABASE_KEY).asText() + "]; SELECT * FROM cdc.change_tables"; + final PreparedStatement ps = connection.prepareStatement(sql); + LOGGER.info(String.format( + "Checking user '%s' can query the cdc schema and that we have at least 1 cdc enabled table using the query: '%s'", + config.get(JdbcUtils.USERNAME_KEY).asText(), sql)); + return ps; + }, sourceOperations::rowToJson); + + // Ensure at least one available CDC table + if (queryResponse.size() < 1) { + throw new RuntimeException( + "No cdc-enabled tables found. Please check the documentation on how to enable CDC on MS SQL Server."); + } + } + + // todo: ensure this works for Azure managed SQL (since it uses different sql server agent) + protected void assertSqlServerAgentRunning(final JdbcDatabase database) throws SQLException { + try { + // EngineEdition property values can be found at + // https://learn.microsoft.com/en-us/sql/t-sql/functions/serverproperty-transact-sql?view=sql-server-ver16 + // SQL Server Agent is always running on SQL Managed Instance: + // https://learn.microsoft.com/en-us/azure/azure-sql/managed-instance/transact-sql-tsql-differences-sql-server?view=azuresql#sql-server-agent + final Integer engineEdition = database.queryInt("SELECT ServerProperty('EngineEdition')"); + if (engineEdition == 8) { + LOGGER.info(String.format("SQL Server Agent is assumed to be running when EngineEdition == '%s'", engineEdition)); + } else { + final List queryResponse = database.queryJsons(connection -> { + final String sql = + "SELECT status_desc FROM sys.dm_server_services WHERE [servicename] LIKE 'SQL Server Agent%' OR [servicename] LIKE 'SQL Server 代理%' "; + final PreparedStatement ps = connection.prepareStatement(sql); + LOGGER.info(String.format("Checking that the SQL Server Agent is running using the query: '%s'", sql)); + return ps; + }, sourceOperations::rowToJson); + + if (!(queryResponse.get(0).get("status_desc").toString().contains("Running"))) { + throw new RuntimeException(String.format( + "The SQL Server Agent is not running. Current state: '%s'. Please check the documentation on ensuring SQL Server Agent is running.", + queryResponse.get(0).get("status_desc").toString())); + } + } + } catch (final Exception e) { + if (e.getCause() != null && e.getCause().getClass().equals(com.microsoft.sqlserver.jdbc.SQLServerException.class)) { + LOGGER.warn(String.format( + "Skipping check for whether the SQL Server Agent is running, SQLServerException thrown: '%s'", + e.getMessage())); + } else { + throw e; + } + } + } + + @Override + public @NotNull List> getIncrementalIterators(final JdbcDatabase database, + final @NotNull ConfiguredAirbyteCatalog catalog, + final @NotNull Map>> tableNameToTable, + final StateManager stateManager, + final @NotNull Instant emittedAt) { + final JsonNode sourceConfig = database.getSourceConfig(); + if (MssqlCdcHelper.isCdc(sourceConfig) && isAnyStreamIncrementalSyncMode(catalog)) { + LOGGER.info("using OC + CDC"); + return MssqlInitialReadUtil.getCdcReadIterators(database, catalog, tableNameToTable, stateManager, initialLoadStateManager, emittedAt, + getQuoteString()); + } else { + if (isAnyStreamIncrementalSyncMode(catalog)) { + LOGGER.info("Syncing via Primary Key"); + final MssqlCursorBasedStateManager cursorBasedStateManager = new MssqlCursorBasedStateManager(stateManager.getRawStateMessages(), catalog); + final InitialLoadStreams initialLoadStreams = + filterStreamInIncrementalMode(streamsForInitialOrderedColumnLoad(cursorBasedStateManager, catalog)); + final Map pairToCursorBasedStatus = + getCursorBasedSyncStatusForStreams(database, initialLoadStreams.streamsForInitialLoad(), stateManager, getQuoteString()); + final CursorBasedStreams cursorBasedStreams = + new CursorBasedStreams(identifyStreamsForCursorBased(catalog, initialLoadStreams.streamsForInitialLoad()), pairToCursorBasedStatus); + + logStreamSyncStatus(initialLoadStreams.streamsForInitialLoad(), "Primary Key"); + logStreamSyncStatus(cursorBasedStreams.streamsForCursorBased(), "Cursor"); + + final MssqlInitialLoadHandler initialLoadHandler = + new MssqlInitialLoadHandler(sourceConfig, database, new MssqlSourceOperations(), getQuoteString(), initialLoadStateManager, + Optional.of(namespacePair -> Jsons.jsonNode(pairToCursorBasedStatus.get(namespacePair))), + getTableSizeInfoForStreams(database, initialLoadStreams.streamsForInitialLoad(), getQuoteString())); + // Cursor based incremental iterators are decorated with start and complete status traces + final List> initialLoadIterator = new ArrayList<>(initialLoadHandler.getIncrementalIterators( + new ConfiguredAirbyteCatalog().withStreams(initialLoadStreams.streamsForInitialLoad()), + tableNameToTable, + emittedAt, true, true, Optional.empty())); + + // Build Cursor based iterator + final List> cursorBasedIterator = + new ArrayList<>(super.getIncrementalIterators(database, + new ConfiguredAirbyteCatalog().withStreams( + cursorBasedStreams.streamsForCursorBased()), + tableNameToTable, + cursorBasedStateManager, emittedAt)); + + return Stream.of(initialLoadIterator, cursorBasedIterator).flatMap(Collection::stream).collect(Collectors.toList()); + + } + } + + LOGGER.info("using CDC: {}", false); + return super.getIncrementalIterators(database, catalog, tableNameToTable, stateManager, emittedAt); + } + + @Override + protected int getStateEmissionFrequency() { + return INTERMEDIATE_STATE_EMISSION_FREQUENCY; + } + + @Override + protected void checkUserHasPrivileges(JsonNode config, JdbcDatabase database) {} + + private static AirbyteStream overrideSyncModes(final AirbyteStream stream) { + return stream.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)); + } + + // Note: in place mutation. + private static AirbyteStream removeIncrementalWithoutPk(final AirbyteStream stream) { + if (stream.getSourceDefinedPrimaryKey().isEmpty()) { + stream.getSupportedSyncModes().remove(SyncMode.INCREMENTAL); + } + + return stream; + } + + // Note: in place mutation. + private static AirbyteStream setIncrementalToSourceDefined(final AirbyteStream stream) { + if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) { + stream.setSourceDefinedCursor(true); + } + + return stream; + } + + /* + * To prepare for Destination v2, cdc streams must have a default cursor field Cursor format: the + * airbyte [emittedAt] + [sync wide record counter] + */ + private static AirbyteStream setDefaultCursorFieldForCdc(final AirbyteStream stream) { + if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) { + stream.setDefaultCursorField(ImmutableList.of(CDC_DEFAULT_CURSOR)); + } + return stream; + } + + // Note: in place mutation. + private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { + + final ObjectNode jsonSchema = (ObjectNode) stream.getJsonSchema(); + final ObjectNode properties = (ObjectNode) jsonSchema.get("properties"); + + final JsonNode airbyteIntegerType = Jsons.jsonNode(ImmutableMap.of("type", "number", "airbyte_type", "integer")); + final JsonNode stringType = Jsons.jsonNode(ImmutableMap.of("type", "string")); + properties.set(CDC_LSN, stringType); + properties.set(CDC_UPDATED_AT, stringType); + properties.set(CDC_DELETED_AT, stringType); + properties.set(CDC_EVENT_SERIAL_NO, stringType); + properties.set(CDC_DEFAULT_CURSOR, airbyteIntegerType); + + return stream; + } + + private void readSsl(final JsonNode sslMethod, final List additionalParameters) { + final JsonNode config = sslMethod.get("ssl_method"); + switch (config.get("ssl_method").asText()) { + case "unencrypted" -> { + additionalParameters.add("encrypt=false"); + additionalParameters.add("trustServerCertificate=true"); + } + case "encrypted_trust_server_certificate" -> { + additionalParameters.add("encrypt=true"); + additionalParameters.add("trustServerCertificate=true"); + } + case "encrypted_verify_certificate" -> { + additionalParameters.add("encrypt=true"); + additionalParameters.add("trustServerCertificate=false"); + + if (config.has("certificate")) { + String certificate = config.get("certificate").asText(); + String password = RandomStringUtils.randomAlphanumeric(100); + final URI keyStoreUri; + try { + keyStoreUri = SSLCertificateUtils.keyStoreFromCertificate(certificate, password, null, null); + } catch (IOException | KeyStoreException | NoSuchAlgorithmException | CertificateException e) { + throw new RuntimeException(e); + } + additionalParameters + .add("trustStore=" + keyStoreUri.getPath()); + additionalParameters + .add("trustStorePassword=" + password); + } + + if (config.has("hostNameInCertificate")) { + additionalParameters + .add("hostNameInCertificate=" + config.get("hostNameInCertificate").asText()); + } + } + } + } + + @Override + public Collection> readStreams(JsonNode config, ConfiguredAirbyteCatalog catalog, JsonNode state) + throws Exception { + final AirbyteStateType supportedType = getSupportedStateType(config); + final StateManager stateManager = StateManagerFactory.createStateManager(supportedType, + StateGeneratorUtils.deserializeInitialState(state, supportedType), catalog); + final Instant emittedAt = Instant.now(); + final JdbcDatabase database = createDatabase(config); + final Map>> fullyQualifiedTableNameToInfo = + discoverWithoutSystemTables(database) + .stream() + .collect(Collectors.toMap(t -> String.format("%s.%s", t.getNameSpace(), t.getName()), + Function + .identity())); + initializeForStateManager(database, catalog, fullyQualifiedTableNameToInfo, stateManager); + logPreSyncDebugData(database, catalog); + return super.readStreams(config, catalog, state); + } + + private boolean cloudDeploymentMode() { + return AdaptiveSourceRunner.CLOUD_MODE.equalsIgnoreCase(getFeatureFlags().deploymentMode()); + } + + public Duration getConnectionTimeoutMssql(final Map connectionProperties) { + return getConnectionTimeout(connectionProperties); + } + + @Override + public JdbcDatabase createDatabase(final JsonNode sourceConfig) throws SQLException { + return createDatabase(sourceConfig, JDBC_DELIMITER); + } + + public static void main(final String[] args) throws Exception { + final Source source = MssqlSource.sshWrappedSource(new MssqlSource()); + final MSSqlSourceExceptionHandler exceptionHandler = new MSSqlSourceExceptionHandler(); + LOGGER.info("starting source: {}", MssqlSource.class); + new IntegrationRunner(source).run(args, exceptionHandler); + LOGGER.info("completed source: {}", MssqlSource.class); + } + + @Override + protected void logPreSyncDebugData(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog) throws SQLException { + super.logPreSyncDebugData(database, catalog); + MssqlQueryUtils.getIndexInfoForStreams(database, catalog, getQuoteString()); + } + + @Override + protected void initializeForStateManager(final JdbcDatabase database, + final ConfiguredAirbyteCatalog catalog, + final Map>> tableNameToTable, + final StateManager stateManager) { + if (initialLoadStateManager != null) { + return; + } + var sourceConfig = database.getSourceConfig(); + if (isCdc(sourceConfig)) { + initialLoadStateManager = getMssqlInitialLoadGlobalStateManager(database, catalog, stateManager, tableNameToTable, getQuoteString()); + } else { + final MssqlCursorBasedStateManager cursorBasedStateManager = new MssqlCursorBasedStateManager(stateManager.getRawStateMessages(), catalog); + final InitialLoadStreams initialLoadStreams = streamsForInitialOrderedColumnLoad(cursorBasedStateManager, catalog); + initialLoadStateManager = new MssqlInitialLoadStreamStateManager(catalog, initialLoadStreams, + initPairToOrderedColumnInfoMap(database, catalog, tableNameToTable, getQuoteString())); + } + } + + @Nullable + @Override + public InitialLoadHandler getInitialLoadHandler(final JdbcDatabase database, + final ConfiguredAirbyteStream airbyteStream, + final ConfiguredAirbyteCatalog catalog, + final StateManager stateManager) { + var sourceConfig = database.getSourceConfig(); + if (isCdc(sourceConfig)) { + return getMssqlFullRefreshInitialLoadHandler(database, catalog, initialLoadStateManager, stateManager, airbyteStream, Instant.now(), + getQuoteString()) + .get(); + } else { + return new MssqlInitialLoadHandler(sourceConfig, database, new MssqlSourceOperations(), getQuoteString(), initialLoadStateManager, + Optional.empty(), + getTableSizeInfoForStreams(database, catalog.getStreams(), getQuoteString())); + } + } + + @Override + public boolean supportResumableFullRefresh(final JdbcDatabase database, final ConfiguredAirbyteStream airbyteStream) { + if (airbyteStream.getStream() != null && airbyteStream.getStream().getSourceDefinedPrimaryKey() != null + && !airbyteStream.getStream().getSourceDefinedPrimaryKey().isEmpty()) { + return true; + } + + return false; + } + + @Override + protected SourceStateMessageProducer getSourceStateProducerForNonResumableFullRefreshStream(final JdbcDatabase database) { + return new NonResumableStateMessageProducer<>(isCdc(database.getSourceConfig()), initialLoadStateManager); + } + + @NotNull + @Override + public AutoCloseableIterator augmentWithStreamStatus(@NotNull final ConfiguredAirbyteStream airbyteStream, + @NotNull final AutoCloseableIterator streamItrator) { + final var pair = + new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace()); + final var starterStatus = + new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED)); + final var completeStatus = + new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE)); + return AutoCloseableIterators.concatWithEagerClose(starterStatus, streamItrator, completeStatus); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java new file mode 100644 index 0000000000000..e356644391a21 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.cdk.db.DataTypeUtils.TIMESTAMPTZ_FORMATTER; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_NAME; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE_NAME; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_SCHEMA_NAME; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_TABLE_NAME; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.microsoft.sqlserver.jdbc.Geography; +import com.microsoft.sqlserver.jdbc.Geometry; +import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData; +import io.airbyte.cdk.db.jdbc.AirbyteRecordData; +import io.airbyte.cdk.db.jdbc.JdbcSourceOperations; +import io.airbyte.integrations.source.mssql.initialsync.CdcMetadataInjector; +import io.airbyte.protocol.models.JsonSchemaType; +import java.sql.JDBCType; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.Base64; +import java.util.Optional; +import microsoft.sql.DateTimeOffset; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlSourceOperations extends JdbcSourceOperations { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSourceOperations.class); + + private final Optional metadataInjector; + + public MssqlSourceOperations() { + super(); + this.metadataInjector = Optional.empty(); + } + + public MssqlSourceOperations(final Optional metadataInjector) { + super(); + this.metadataInjector = metadataInjector; + } + + @Override + public AirbyteRecordData convertDatabaseRowToAirbyteRecordData(final ResultSet queryContext) throws SQLException { + final AirbyteRecordData recordData = super.convertDatabaseRowToAirbyteRecordData(queryContext); + final ObjectNode jsonNode = (ObjectNode) recordData.rawRowData(); + if (!metadataInjector.isPresent()) { + return recordData; + } + metadataInjector.get().inject(jsonNode); + return new AirbyteRecordData(jsonNode, recordData.meta()); + } + + /** + * The method is used to set json value by type. Need to be overridden as MSSQL has some its own + * specific types (ex. Geometry, Geography, Hierarchyid, etc) + * + * @throws SQLException + */ + @Override + public void copyToJsonField(final ResultSet resultSet, final int colIndex, final ObjectNode json) + throws SQLException { + final SQLServerResultSetMetaData metadata = (SQLServerResultSetMetaData) resultSet + .getMetaData(); + final String columnName = metadata.getColumnName(colIndex); + final String columnTypeName = metadata.getColumnTypeName(colIndex); + + // Attempt to access the column. this allows us to know if it is null before we do + // type-specific parsing. If the column is null, we will populate the null value and skip attempting + // to + // parse the column value. + resultSet.getObject(colIndex); + if (resultSet.wasNull()) { + json.putNull(columnName); + } else if (columnTypeName.equalsIgnoreCase("time")) { + putTime(json, columnName, resultSet, colIndex); + } else if (columnTypeName.equalsIgnoreCase("geometry")) { + putGeometry(json, columnName, resultSet, colIndex); + } else if (columnTypeName.equalsIgnoreCase("geography")) { + putGeography(json, columnName, resultSet, colIndex); + } else if (columnTypeName.equalsIgnoreCase("datetimeoffset")) { + // JDBC will recognize such columns as VARCHAR. Thus we have to have special handling on it. + putTimestampWithTimezone(json, columnName, resultSet, colIndex); + } else { + super.copyToJsonField(resultSet, colIndex, json); + } + } + + @Override + public JDBCType getDatabaseFieldType(final JsonNode field) { + try { + final String typeName = field.get(INTERNAL_COLUMN_TYPE_NAME).asText(); + if (typeName.equalsIgnoreCase("geography") + || typeName.equalsIgnoreCase("geometry") + || typeName.equalsIgnoreCase("hierarchyid")) { + return JDBCType.VARCHAR; + } + + if (typeName.equalsIgnoreCase("datetime")) { + return JDBCType.TIMESTAMP; + } + + if (typeName.equalsIgnoreCase("datetimeoffset")) { + return JDBCType.TIMESTAMP_WITH_TIMEZONE; + } + + if (typeName.equalsIgnoreCase("real")) { + return JDBCType.REAL; + } + + return JDBCType.valueOf(field.get(INTERNAL_COLUMN_TYPE).asInt()); + } catch (final IllegalArgumentException ex) { + LOGGER.warn(String.format("Could not convert column: %s from table: %s.%s with type: %s. Casting to VARCHAR.", + field.get(INTERNAL_COLUMN_NAME), + field.get(INTERNAL_SCHEMA_NAME), + field.get(INTERNAL_TABLE_NAME), + field.get(INTERNAL_COLUMN_TYPE))); + return JDBCType.VARCHAR; + } + } + + @Override + protected void putBinary(final ObjectNode node, + final String columnName, + final ResultSet resultSet, + final int index) + throws SQLException { + final byte[] bytes = resultSet.getBytes(index); + final String value = Base64.getEncoder().encodeToString(bytes); + node.put(columnName, value); + } + + protected void putGeometry(final ObjectNode node, + final String columnName, + final ResultSet resultSet, + final int index) + throws SQLException { + node.put(columnName, Geometry.deserialize(resultSet.getBytes(index)).toString()); + } + + protected void putGeography(final ObjectNode node, + final String columnName, + final ResultSet resultSet, + final int index) + throws SQLException { + node.put(columnName, Geography.deserialize(resultSet.getBytes(index)).toString()); + } + + @Override + protected void putTimestamp(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) throws SQLException { + final DateTimeFormatter microsecondsFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss[.][SSSSSS]"); + node.put(columnName, getObject(resultSet, index, LocalDateTime.class).format(microsecondsFormatter)); + } + + @Override + public JsonSchemaType getAirbyteType(final JDBCType jdbcType) { + return switch (jdbcType) { + case TINYINT, SMALLINT, INTEGER, BIGINT -> JsonSchemaType.INTEGER; + case DOUBLE, DECIMAL, FLOAT, NUMERIC, REAL -> JsonSchemaType.NUMBER; + case BOOLEAN, BIT -> JsonSchemaType.BOOLEAN; + case NULL -> JsonSchemaType.NULL; + case BLOB, BINARY, VARBINARY, LONGVARBINARY -> JsonSchemaType.STRING_BASE_64; + case TIME -> JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE; + case TIMESTAMP_WITH_TIMEZONE -> JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE; + case TIMESTAMP -> JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE; + case DATE -> JsonSchemaType.STRING_DATE; + default -> JsonSchemaType.STRING; + }; + } + + @Override + protected void setTimestampWithTimezone(final PreparedStatement preparedStatement, final int parameterIndex, final String value) + throws SQLException { + try { + final OffsetDateTime offsetDateTime = OffsetDateTime.parse(value, TIMESTAMPTZ_FORMATTER); + final Timestamp timestamp = Timestamp.valueOf(offsetDateTime.atZoneSameInstant(offsetDateTime.getOffset()).toLocalDateTime()); + // Final step of conversion from + // OffsetDateTime (a Java construct) object -> Timestamp (a Java construct) -> + // DateTimeOffset (a Microsoft.sql specific construct) + // and provide the offset in minutes to the converter + final DateTimeOffset datetimeoffset = DateTimeOffset.valueOf(timestamp, offsetDateTime.getOffset().getTotalSeconds() / 60); + preparedStatement.setObject(parameterIndex, datetimeoffset); + } catch (final DateTimeParseException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java new file mode 100644 index 0000000000000..5236d102a567f --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.cdc; + +public class MssqlCdcStateConstants { + + public static final String MSSQL_CDC_OFFSET = "mssql_cdc_offset"; + public static final String MSSQL_DB_HISTORY = "mssql_db_history"; + public static final String IS_COMPRESSED = "is_compressed"; + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java new file mode 100644 index 0000000000000..3cd7414586efa --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.cdc; + +import static io.debezium.relational.RelationalDatabaseConnectorConfig.DATABASE_NAME; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.debezium.internals.AirbyteFileOffsetBackingStore; +import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage; +import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumRecordPublisher; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumStateUtil; +import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; +import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumPropertiesManager; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.debezium.config.Configuration; +import io.debezium.connector.common.OffsetReader; +import io.debezium.connector.sqlserver.Lsn; +import io.debezium.connector.sqlserver.SqlServerConnectorConfig; +import io.debezium.connector.sqlserver.SqlServerOffsetContext; +import io.debezium.connector.sqlserver.SqlServerOffsetContext.Loader; +import io.debezium.connector.sqlserver.SqlServerPartition; +import io.debezium.engine.ChangeEvent; +import io.debezium.pipeline.spi.Offsets; +import io.debezium.pipeline.spi.Partition; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; +import org.apache.kafka.connect.storage.FileOffsetBackingStore; +import org.apache.kafka.connect.storage.OffsetStorageReaderImpl; +import org.codehaus.plexus.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlDebeziumStateUtil implements DebeziumStateUtil { + + // Testing is done concurrently so initialState is cached in a thread local variable + // in order to provide each test thread with its own correct initial state + private static ThreadLocal initialState = new ThreadLocal<>(); + + final static String LSN_OFFSET_INCLUDED_QUERY = """ + DECLARE @saved_lsn BINARY(10), @min_lsn BINARY(10), @max_lsn BINARY(10), @res BIT + -- Set @saved_lsn = 0x0000DF7C000006A80006 + Set @saved_lsn = ? + SELECT @min_lsn = MIN(start_lsn) FROM cdc.change_tables + SELECT @max_lsn = sys.fn_cdc_get_max_lsn() + IF (@saved_lsn >= @min_lsn) + Set @res = 1 + ELSE + Set @res = 0 + select @res as [included], @min_lsn as [min], @max_lsn as [max] + """; + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlDebeziumStateUtil.class); + + /** + * Generate initial state for debezium state. + */ + public static synchronized JsonNode constructInitialDebeziumState(final Properties properties, + final ConfiguredAirbyteCatalog catalog, + final JdbcDatabase database) { + // There is no need to construct an initial state after it was already constructed in this run + // Starting and stopping mssql debezium too many times causes it to hang during shutdown + if (initialState.get() == null) { + properties.setProperty("heartbeat.interval.ms", "0"); + final JsonNode highWaterMark = constructLsnSnapshotState(database, database.getSourceConfig().get(JdbcUtils.DATABASE_KEY).asText()); + final AirbyteFileOffsetBackingStore emptyOffsetManager = AirbyteFileOffsetBackingStore.initializeState(null, + Optional.empty()); + final AirbyteSchemaHistoryStorage schemaHistoryStorage = + AirbyteSchemaHistoryStorage.initializeDBHistory(new SchemaHistory<>(Optional.empty(), false), false); + final LinkedBlockingQueue> queue = new LinkedBlockingQueue<>(); + final Instant engineStartTime = Instant.now(); + boolean schemaHistoryRead = false; + SchemaHistory schemaHistory = null; + final var debeziumPropertiesManager = + new RelationalDbDebeziumPropertiesManager(properties, database.getSourceConfig(), catalog, Collections.emptyList()); + try { + final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(debeziumPropertiesManager); + publisher.start(queue, emptyOffsetManager, Optional.of(schemaHistoryStorage)); + while (!publisher.hasClosed()) { + final ChangeEvent event = queue.poll(10, TimeUnit.SECONDS); + + // If no event such as an empty table, generating schema history may take a few cycles + // depending on the size of history. + schemaHistory = schemaHistoryStorage.read(); + schemaHistoryRead = Objects.nonNull(schemaHistory) && StringUtils.isNotBlank(schemaHistory.getSchema()); + + if (event != null || schemaHistoryRead) { + publisher.close(); + break; + } + + Duration initialWaitingDuration = Duration.ofMinutes(5L); + // If initial waiting seconds is configured and it's greater than 5 minutes, use that value instead + // of the default value + final Duration configuredDuration = RecordWaitTimeUtil.getFirstRecordWaitTime(database.getSourceConfig()); + if (configuredDuration.compareTo(initialWaitingDuration) > 0) { + initialWaitingDuration = configuredDuration; + } + if (Duration.between(engineStartTime, Instant.now()).compareTo(initialWaitingDuration) > 0) { + LOGGER.error("Schema history not constructed after {} seconds of waiting, closing the engine", initialWaitingDuration.getSeconds()); + publisher.close(); + throw new RuntimeException( + "Building schema history has timed out. Please consider increasing the debezium wait time in advanced options."); + } + } + } catch (final InterruptedException ine) { + LOGGER.debug("Interrupted during closing of publisher"); + } catch (final Exception e) { + throw new RuntimeException(e); + } + + final AirbyteFileOffsetBackingStore offsetManager = AirbyteFileOffsetBackingStore.initializeState(highWaterMark, + Optional.empty()); + + final Map offset = offsetManager.read(); + if (!schemaHistoryRead) { + schemaHistory = schemaHistoryStorage.read(); + } + + assert !offset.isEmpty(); + assert Objects.nonNull(schemaHistory); + assert Objects.nonNull(schemaHistory.getSchema()); + + final JsonNode asJson = serialize(offset, schemaHistory); + LOGGER.info("Initial Debezium state constructed. offset={}", Jsons.jsonNode(offset)); + + if (asJson.get(MssqlCdcStateConstants.MSSQL_DB_HISTORY).asText().isBlank()) { + throw new RuntimeException("Schema history snapshot returned empty history."); + } + initialState.set(asJson); + } + return initialState.get(); + + } + + public static void disposeInitialState() { + LOGGER.debug("Dispose initial state cached for {}", Thread.currentThread()); + initialState.remove(); + } + + private static JsonNode serialize(final Map offset, final SchemaHistory dbHistory) { + final Map state = new HashMap<>(); + state.put(MssqlCdcStateConstants.MSSQL_CDC_OFFSET, offset); + state.put(MssqlCdcStateConstants.MSSQL_DB_HISTORY, dbHistory.getSchema()); + state.put(MssqlCdcStateConstants.IS_COMPRESSED, dbHistory.isCompressed()); + + return Jsons.jsonNode(state); + } + + public static MssqlDebeziumStateAttributes getStateAttributesFromDB(final JdbcDatabase database) { + try (final Stream stream = database.unsafeResultSetQuery( + connection -> connection.createStatement().executeQuery("select sys.fn_cdc_get_max_lsn()"), + resultSet -> { + final byte[] lsnBinary = resultSet.getBytes(1); + Lsn lsn = Lsn.valueOf(lsnBinary); + return new MssqlDebeziumStateAttributes(lsn); + })) { + final List stateAttributes = stream.toList(); + assert stateAttributes.size() == 1; + return stateAttributes.get(0); + } catch (final SQLException e) { + throw new RuntimeException(e); + } + } + + public record MssqlDebeziumStateAttributes(Lsn lsn) {} + + /** + * Method to construct initial Debezium state which can be passed onto Debezium engine to make it + * process binlogs from a specific file and position and skip snapshot phase Example: + * ["test",{"server":"test","database":"test"}]" : + * "{"transaction_id":null,"event_serial_no":1,"commit_lsn":"00000644:00002ff8:0099","change_lsn":"0000062d:00017ff0:016d"}" + */ + static JsonNode constructLsnSnapshotState(final JdbcDatabase database, final String dbName) { + return format(getStateAttributesFromDB(database), dbName); + } + + @VisibleForTesting + public static JsonNode format(final MssqlDebeziumStateAttributes attributes, final String dbName) { + final String key = "[\"" + dbName + "\",{\"server\":\"" + dbName + "\",\"database\":\"" + dbName + "\"}]"; + final String value = + "{\"commit_lsn\":\"" + attributes.lsn.toString() + "\",\"snapshot\":true,\"snapshot_completed\":true" + + "}"; + + final Map result = new HashMap<>(); + result.put(key, value); + + final JsonNode jsonNode = Jsons.jsonNode(result); + LOGGER.info("Initial Debezium state offset constructed: {}", jsonNode); + + return jsonNode; + } + + public Optional savedOffset(final Properties baseProperties, + final ConfiguredAirbyteCatalog catalog, + final JsonNode cdcOffset, + final JsonNode config) { + if (Objects.isNull(cdcOffset)) { + return Optional.empty(); + } + + final var offsetManager = AirbyteFileOffsetBackingStore.initializeState(cdcOffset, Optional.empty()); + final DebeziumPropertiesManager debeziumPropertiesManager = + new RelationalDbDebeziumPropertiesManager(baseProperties, config, catalog, Collections.emptyList()); + final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager); + return parseSavedOffset(debeziumProperties); + } + + private Optional parseSavedOffset(final Properties properties) { + FileOffsetBackingStore fileOffsetBackingStore = null; + OffsetStorageReaderImpl offsetStorageReader = null; + + try { + fileOffsetBackingStore = getFileOffsetBackingStore(properties); + offsetStorageReader = getOffsetStorageReader(fileOffsetBackingStore, properties); + + final SqlServerConnectorConfig connectorConfig = new SqlServerConnectorConfig(Configuration.from(properties)); + final SqlServerOffsetContext.Loader loader = new Loader(connectorConfig); + final Set partitions = + Collections.singleton(new SqlServerPartition(connectorConfig.getLogicalName(), properties.getProperty(DATABASE_NAME.name()))); + final OffsetReader offsetReader = new OffsetReader<>(offsetStorageReader, loader); + final Map offsets = offsetReader.offsets(partitions); + return extractStateAttributes(partitions, offsets); + } finally { + LOGGER.info("Closing offsetStorageReader and fileOffsetBackingStore"); + if (offsetStorageReader != null) { + offsetStorageReader.close(); + } + + if (fileOffsetBackingStore != null) { + fileOffsetBackingStore.stop(); + } + + } + } + + private Optional extractStateAttributes(final Set partitions, + final Map offsets) { + boolean found = false; + for (final Partition partition : partitions) { + final SqlServerOffsetContext mssqlOffsetContext = offsets.get(partition); + + if (mssqlOffsetContext != null) { + found = true; + LOGGER.info("Found previous partition offset {}: {}", partition, mssqlOffsetContext.getOffset()); + } + } + + if (!found) { + LOGGER.info("No previous offsets found"); + return Optional.empty(); + } + + final Offsets of = Offsets.of(offsets); + final SqlServerOffsetContext previousOffset = of.getTheOnlyOffset(); + return Optional.of(new MssqlDebeziumStateAttributes(previousOffset.getChangePosition().getCommitLsn())); + } + + public boolean savedOffsetStillPresentOnServer(final JdbcDatabase database, final MssqlDebeziumStateAttributes savedState) { + final Lsn savedLsn = savedState.lsn(); + try (final Stream stream = database.unsafeResultSetQuery( + connection -> { + PreparedStatement stmt = connection.prepareStatement(LSN_OFFSET_INCLUDED_QUERY); + stmt.setBytes(1, savedLsn.getBinary()); + return stmt.executeQuery(); + }, + resultSet -> { + final byte[] minLsnBinary = resultSet.getBytes(2); + Lsn min_lsn = Lsn.valueOf(minLsnBinary); + final byte[] maxLsnBinary = resultSet.getBytes(3); + Lsn max_lsn = Lsn.valueOf(maxLsnBinary); + final Boolean included = resultSet.getBoolean(1); + LOGGER.info("{} lsn exists on server: [{}]. (min server lsn: {} max server lsn: {})", savedLsn.toString(), included, min_lsn.toString(), + max_lsn.toString()); + return included; + })) { + final List reses = stream.toList(); + assert reses.size() == 1; + + return reses.get(0); + } catch (final SQLException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java new file mode 100644 index 0000000000000..843947efe5ab8 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.cursor_based; + +import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.MSSQL_STATE_VERSION; + +import com.google.common.collect.Lists; +import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo; +import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus; +import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType; +import io.airbyte.cdk.integrations.source.relationaldb.state.StreamStateManager; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.StreamDescriptor; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlCursorBasedStateManager extends StreamStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCursorBasedStateManager.class); + + public MssqlCursorBasedStateManager(final List airbyteStateMessages, final ConfiguredAirbyteCatalog catalog) { + super(airbyteStateMessages, catalog); + } + + @Override + public AirbyteStateMessage toState(final Optional pair) { + if (pair.isPresent()) { + final Map pairToCursorInfoMap = getPairToCursorInfoMap(); + final Optional cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair.get())); + + if (cursorInfo.isPresent()) { + LOGGER.debug("Generating state message for {}...", pair); + return new AirbyteStateMessage() + .withType(AirbyteStateType.STREAM) + // Temporarily include legacy state for backwards compatibility with the platform + .withStream(generateStreamState(pair.get(), cursorInfo.get())); + } else { + LOGGER.warn("Cursor information could not be located in state for stream {}. Returning a new, empty state message...", pair); + return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); + } + } else { + LOGGER.warn("Stream not provided. Returning a new, empty state message..."); + return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); + } + } + + /** + * Generates the stream state for the given stream and cursor information. + * + * @param airbyteStreamNameNamespacePair The stream. + * @param cursorInfo The current cursor. + * @return The {@link AirbyteStreamState} representing the current state of the stream. + */ + private AirbyteStreamState generateStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, + final CursorInfo cursorInfo) { + return new AirbyteStreamState() + .withStreamDescriptor( + new StreamDescriptor().withName(airbyteStreamNameNamespacePair.getName()).withNamespace(airbyteStreamNameNamespacePair.getNamespace())) + .withStreamState(Jsons.jsonNode(generateDbStreamState(airbyteStreamNameNamespacePair, cursorInfo))); + } + + private CursorBasedStatus generateDbStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, + final CursorInfo cursorInfo) { + final CursorBasedStatus state = new CursorBasedStatus(); + state.setStateType(StateType.CURSOR_BASED); + state.setVersion(MSSQL_STATE_VERSION); + state.setStreamName(airbyteStreamNameNamespacePair.getName()); + state.setStreamNamespace(airbyteStreamNameNamespacePair.getNamespace()); + state.setCursorField(cursorInfo.getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(cursorInfo.getCursorField())); + state.setCursor(cursorInfo.getCursor()); + if (cursorInfo.getCursorRecordCount() > 0L) { + state.setCursorRecordCount(cursorInfo.getCursorRecordCount()); + } + return state; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java new file mode 100644 index 0000000000000..419da08fb0ee2 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.initialsync; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.integrations.source.mssql.MssqlCdcConnectorMetadataInjector; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes; + +public class CdcMetadataInjector { + + private final String transactionTimestamp; + private final MssqlDebeziumStateAttributes stateAttributes; + private final MssqlCdcConnectorMetadataInjector metadataInjector; + + public CdcMetadataInjector(final String transactionTimestamp, + final MssqlDebeziumStateAttributes stateAttributes, + final MssqlCdcConnectorMetadataInjector metadataInjector) { + this.transactionTimestamp = transactionTimestamp; + this.stateAttributes = stateAttributes; + this.metadataInjector = metadataInjector; + } + + public void inject(final ObjectNode record) { + metadataInjector.addMetaDataToRowsFetchedOutsideDebezium(record, transactionTimestamp, stateAttributes); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java new file mode 100644 index 0000000000000..4fa63c266c6e1 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.initialsync; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState; +import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo; +import io.airbyte.protocol.models.v0.*; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import java.util.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlInitialLoadGlobalStateManager extends MssqlInitialLoadStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadGlobalStateManager.class); + private StateManager stateManager; + private final CdcState initialCdcState; + // Only one global state is emitted, which is fanned out into many entries in the DB by platform. As + // a result, we need to keep track of streams that have completed the snapshot. + private Set streamsThatHaveCompletedSnapshot; + + // No special handling for resumable full refresh streams. We will report the cursor as it is. + private Set resumableFullRefreshStreams; + private Set nonResumableFullRefreshStreams; + private Set completedNonResumableFullRefreshStreams; + + public MssqlInitialLoadGlobalStateManager(final InitialLoadStreams initialLoadStreams, + final Map pairToOrderedColInfo, + final StateManager stateManager, + final ConfiguredAirbyteCatalog catalog, + final CdcState initialCdcState) { + this.pairToOrderedColLoadStatus = MssqlInitialLoadStateManager.initPairToOrderedColumnLoadStatusMap(initialLoadStreams.pairToInitialLoadStatus()); + this.pairToOrderedColInfo = pairToOrderedColInfo; + this.stateManager = stateManager; + this.initialCdcState = initialCdcState; + this.streamStateForIncrementalRunSupplier = pair -> Jsons.emptyObject(); + initStreams(initialLoadStreams, catalog); + } + + private AirbyteGlobalState generateGlobalState(final List streamStates) { + CdcState cdcState = stateManager.getCdcStateManager().getCdcState(); + if (cdcState == null || cdcState.getState() == null) { + cdcState = initialCdcState; + } + + final AirbyteGlobalState globalState = new AirbyteGlobalState(); + globalState.setSharedState(Jsons.jsonNode(cdcState)); + globalState.setStreamStates(streamStates); + return globalState; + } + + private void initStreams(final InitialLoadStreams initialLoadStreams, + final ConfiguredAirbyteCatalog catalog) { + this.streamsThatHaveCompletedSnapshot = new HashSet<>(); + this.resumableFullRefreshStreams = new HashSet<>(); + this.nonResumableFullRefreshStreams = new HashSet<>(); + this.completedNonResumableFullRefreshStreams = new HashSet<>(); + + catalog.getStreams().forEach(configuredAirbyteStream -> { + var pairInStream = + new AirbyteStreamNameNamespacePair(configuredAirbyteStream.getStream().getName(), configuredAirbyteStream.getStream().getNamespace()); + if (!initialLoadStreams.streamsForInitialLoad().contains(configuredAirbyteStream) + && configuredAirbyteStream.getSyncMode() == SyncMode.INCREMENTAL) { + this.streamsThatHaveCompletedSnapshot.add(pairInStream); + } + if (configuredAirbyteStream.getSyncMode() == SyncMode.FULL_REFRESH) { + if (configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey() != null + && !configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey().isEmpty()) { + this.resumableFullRefreshStreams.add(pairInStream); + } else { + this.nonResumableFullRefreshStreams.add(pairInStream); + } + } + }); + } + + @Override + public AirbyteStateMessage generateStateMessageAtCheckpoint(final ConfiguredAirbyteStream airbyteStream) { + final List streamStates = new ArrayList<>(); + streamsThatHaveCompletedSnapshot.forEach(stream -> { + final DbStreamState state = getFinalState(stream); + streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(state))); + }); + + resumableFullRefreshStreams.forEach(stream -> { + var ocStatus = getOrderedColumnLoadStatus(stream); + if (ocStatus != null) { + streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(ocStatus))); + } + }); + + completedNonResumableFullRefreshStreams.forEach(stream -> { + streamStates.add(new AirbyteStreamState() + .withStreamDescriptor( + new StreamDescriptor().withName(stream.getName()).withNamespace(stream.getNamespace()))); + }); + + if (airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) { + AirbyteStreamNameNamespacePair pair = + new AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace()); + var ocStatus = getOrderedColumnLoadStatus(pair); + streamStates.add(getAirbyteStreamState(pair, Jsons.jsonNode(ocStatus))); + } + + return new AirbyteStateMessage() + .withType(AirbyteStateType.GLOBAL) + .withGlobal(generateGlobalState(streamStates)); + } + + private AirbyteStreamState getAirbyteStreamState(final AirbyteStreamNameNamespacePair pair, final JsonNode stateData) { + Preconditions.checkNotNull(pair); + Preconditions.checkNotNull(pair.getName()); + Preconditions.checkNotNull(pair.getNamespace()); + + return new AirbyteStreamState() + .withStreamDescriptor( + new StreamDescriptor().withName(pair.getName()).withNamespace(pair.getNamespace())) + .withStreamState(stateData); + } + + @Override + public AirbyteStateMessage createFinalStateMessage(final ConfiguredAirbyteStream airbyteStream) { + + final io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair pair = new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair( + airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace()); + if (airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) { + streamsThatHaveCompletedSnapshot.add(pair); + } else if (nonResumableFullRefreshStreams.contains(pair)) { + completedNonResumableFullRefreshStreams.add(pair); + } + final List streamStates = new ArrayList<>(); + streamsThatHaveCompletedSnapshot.forEach(stream -> { + final DbStreamState state = getFinalState(stream); + streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(state))); + }); + + resumableFullRefreshStreams.forEach(stream -> { + var ocStatus = getOrderedColumnLoadStatus(stream); + streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(ocStatus))); + }); + + completedNonResumableFullRefreshStreams.forEach(stream -> { + streamStates.add(new AirbyteStreamState() + .withStreamDescriptor( + new StreamDescriptor().withName(stream.getName()).withNamespace(stream.getNamespace()))); + }); + + return new AirbyteStateMessage() + .withType(AirbyteStateType.GLOBAL) + .withGlobal(generateGlobalState(streamStates)); + } + + private DbStreamState getFinalState(final AirbyteStreamNameNamespacePair pair) { + Preconditions.checkNotNull(pair); + Preconditions.checkNotNull(pair.getName()); + Preconditions.checkNotNull(pair.getNamespace()); + + return new DbStreamState() + .withStreamName(pair.getName()) + .withStreamNamespace(pair.getNamespace()) + .withCursorField(Collections.emptyList()) + .withCursor(null); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java new file mode 100644 index 0000000000000..e5cf72e6c5faf --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.initialsync; + +import static io.airbyte.cdk.db.jdbc.JdbcConstants.JDBC_COLUMN_COLUMN_NAME; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.JDBC_COLUMN_DATABASE_NAME; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.JDBC_COLUMN_SCHEMA_NAME; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.JDBC_COLUMN_TABLE_NAME; +import static io.airbyte.cdk.db.jdbc.JdbcConstants.JDBC_COLUMN_TYPE; +import static io.airbyte.cdk.db.jdbc.JdbcUtils.getFullyQualifiedTableName; +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_DURATION_PROPERTY; +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.cdk.db.SqlDatabase; +import io.airbyte.cdk.db.jdbc.AirbyteRecordData; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants; +import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil; +import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadHandler; +import io.airbyte.cdk.integrations.source.relationaldb.TableInfo; +import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateIterator; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateEmitFrequency; +import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator; +import io.airbyte.commons.stream.AirbyteStreamStatusHolder; +import io.airbyte.commons.stream.AirbyteStreamUtils; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.integrations.source.mssql.MssqlQueryUtils.TableSizeInfo; +import io.airbyte.integrations.source.mssql.MssqlSourceOperations; +import io.airbyte.protocol.models.CommonField; +import io.airbyte.protocol.models.v0.*; +import io.airbyte.protocol.models.v0.AirbyteMessage.Type; +import java.sql.DatabaseMetaData; +import java.sql.JDBCType; +import java.sql.SQLException; +import java.time.Duration; +import java.time.Instant; +import java.util.*; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; +import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlInitialLoadHandler implements InitialLoadHandler { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadHandler.class); + private static final long RECORD_LOGGING_SAMPLE_RATE = 1_000_000; + private final JsonNode config; + private final JdbcDatabase database; + private final MssqlSourceOperations sourceOperations; + private final String quoteString; + private final MssqlInitialLoadStateManager initialLoadStateManager; + private final Optional> streamStateForIncrementalRunSupplier; + private static final long QUERY_TARGET_SIZE_GB = 1_073_741_824; + private static final long DEFAULT_CHUNK_SIZE = 1_000_000; + final Map tableSizeInfoMap; + + public MssqlInitialLoadHandler( + final JsonNode config, + final JdbcDatabase database, + final MssqlSourceOperations sourceOperations, + final String quoteString, + final MssqlInitialLoadStateManager initialLoadStateManager, + final Optional> streamStateForIncrementalRunSupplier, + final Map tableSizeInfoMap) { + this.config = config; + this.database = database; + this.sourceOperations = sourceOperations; + this.quoteString = quoteString; + this.initialLoadStateManager = initialLoadStateManager; + this.streamStateForIncrementalRunSupplier = streamStateForIncrementalRunSupplier; + this.tableSizeInfoMap = tableSizeInfoMap; + } + + private static String getCatalog(final SqlDatabase database) { + return (database.getSourceConfig().has(JdbcUtils.DATABASE_KEY) ? database.getSourceConfig().get(JdbcUtils.DATABASE_KEY).asText() : null); + } + + public static String discoverClusteredIndexForStream(final JdbcDatabase database, + final AirbyteStream stream) { + Map clusteredIndexes = new HashMap<>(); + try { + // Get all clustered index names without specifying a table name + clusteredIndexes = aggregateClusteredIndexes(database.bufferedResultSetQuery( + connection -> connection.getMetaData().getIndexInfo(getCatalog(database), stream.getNamespace(), stream.getName(), false, false), + r -> { + if (r.getShort(JDBC_COLUMN_TYPE) == DatabaseMetaData.tableIndexClustered) { + final String schemaName = + r.getObject(JDBC_COLUMN_SCHEMA_NAME) != null ? r.getString(JDBC_COLUMN_SCHEMA_NAME) : r.getString(JDBC_COLUMN_DATABASE_NAME); + final String streamName = getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME)); + final String columnName = r.getString(JDBC_COLUMN_COLUMN_NAME); + return new ClusteredIndexAttributesFromDb(streamName, columnName); + } else { + return null; + } + })); + } catch (final SQLException e) { + LOGGER.debug(String.format("Could not retrieve clustered indexes without a table name (%s), not blocking, fall back to use pk.", e)); + } + LOGGER.debug("clusteredIndexes: {}", StringUtils.join(clusteredIndexes)); + final String streamName = stream.getName(); + final String namespace = stream.getNamespace(); + + return clusteredIndexes.getOrDefault( + getFullyQualifiedTableName(namespace, streamName), null); + } + + @VisibleForTesting + public record ClusteredIndexAttributesFromDb(String streamName, + String columnName) {} + + /** + * Aggregate list of @param entries of StreamName and clustered index column name + * + * @return a map by StreamName to associated columns in clustered index. If clustered index has + * multiple columns, we always use the first column. + */ + @VisibleForTesting + static Map aggregateClusteredIndexes(final List entries) { + final Map result = new HashMap<>(); + entries.forEach(entry -> { + if (entry == null) { + return; + } + if (result.containsKey(entry.streamName())) { + return; + } + result.put(entry.streamName, entry.columnName()); + }); + return result; + } + + public List> getIncrementalIterators( + final ConfiguredAirbyteCatalog catalog, + final Map>> tableNameToTable, + final Instant emittedAt, + final boolean decorateWithStartedStatus, + final boolean decorateWithCompletedStatus, + @NotNull final Optional cdcInitialLoadTimeout) { + final List> iteratorList = new ArrayList<>(); + for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) { + final AirbyteStream stream = airbyteStream.getStream(); + final String streamName = stream.getName(); + final String namespace = stream.getNamespace(); + // TODO: need to select column according to indexing status of table. may not be primary key + final var pair = new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(streamName, namespace); + if (airbyteStream.getSyncMode().equals(SyncMode.INCREMENTAL)) { + final String fullyQualifiedTableName = DbSourceDiscoverUtil.getFullyQualifiedTableName(namespace, streamName); + + // Grab the selected fields to sync + final TableInfo> table = tableNameToTable.get(fullyQualifiedTableName); + if (decorateWithStartedStatus) { + iteratorList.add( + new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED))); + } + iteratorList.add(getIteratorForStream(airbyteStream, table, emittedAt, cdcInitialLoadTimeout)); + if (decorateWithCompletedStatus) { + iteratorList.add(new StreamStatusTraceEmitterIterator( + new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE))); + } + } + } + return iteratorList; + } + + @NotNull + @Override + public AutoCloseableIterator getIteratorForStream(@NotNull final ConfiguredAirbyteStream airbyteStream, + @NotNull final TableInfo> table, + @NotNull final Instant emittedAt, + @NotNull final Optional cdcInitialLoadTimeout) { + final AirbyteStream stream = airbyteStream.getStream(); + final String streamName = stream.getName(); + final String namespace = stream.getNamespace(); + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamName, namespace); + final List selectedDatabaseFields = table.getFields() + .stream() + .map(CommonField::getName) + .filter(CatalogHelpers.getTopLevelFieldNames(airbyteStream)::contains) + .toList(); + final AutoCloseableIterator queryStream = + new MssqlInitialLoadRecordIterator(database, sourceOperations, quoteString, initialLoadStateManager, selectedDatabaseFields, pair, + calculateChunkSize(tableSizeInfoMap.get(pair), pair), isCompositePrimaryKey(airbyteStream), emittedAt, cdcInitialLoadTimeout); + final AutoCloseableIterator recordIterator = + getRecordIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli()); + final AutoCloseableIterator recordAndMessageIterator = augmentWithState(recordIterator, airbyteStream); + return augmentWithLogs(recordAndMessageIterator, pair, streamName); + } + + // Transforms the given iterator to create an {@link AirbyteRecordMessage} + private AutoCloseableIterator getRecordIterator( + final AutoCloseableIterator recordIterator, + final String streamName, + final String namespace, + final long emittedAt) { + return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage() + .withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withEmittedAt(emittedAt) + .withData(r.rawRowData()) + .withMeta(isMetaChangesEmptyOrNull(r.meta()) ? null : r.meta()))); + } + + private boolean isMetaChangesEmptyOrNull(AirbyteRecordMessageMeta meta) { + return meta == null || meta.getChanges() == null || meta.getChanges().isEmpty(); + } + + // Augments the given iterator with record count logs. + private AutoCloseableIterator augmentWithLogs(final AutoCloseableIterator iterator, + final AirbyteStreamNameNamespacePair pair, + final String streamName) { + final AtomicLong recordCount = new AtomicLong(); + return AutoCloseableIterators.transform(iterator, + AirbyteStreamUtils.convertFromNameAndNamespace(pair.getName(), pair.getNamespace()), + r -> { + final long count = recordCount.incrementAndGet(); + if (count % RECORD_LOGGING_SAMPLE_RATE == 0) { + LOGGER.info("Reading stream {}. Records read: {}", streamName, count); + } + return r; + }); + } + + private AutoCloseableIterator augmentWithState(final AutoCloseableIterator recordIterator, + final ConfiguredAirbyteStream airbyteStream) { + final AirbyteStreamNameNamespacePair pair = + new AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace()); + + final Duration syncCheckpointDuration = + config.get(SYNC_CHECKPOINT_DURATION_PROPERTY) != null + ? Duration.ofSeconds(config.get(SYNC_CHECKPOINT_DURATION_PROPERTY).asLong()) + : DebeziumIteratorConstants.SYNC_CHECKPOINT_DURATION; + final Long syncCheckpointRecords = config.get(SYNC_CHECKPOINT_RECORDS_PROPERTY) != null ? config.get(SYNC_CHECKPOINT_RECORDS_PROPERTY).asLong() + : DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS; + + streamStateForIncrementalRunSupplier.ifPresent(initialLoadStateManager::setStreamStateForIncrementalRunSupplier); + return AutoCloseableIterators.transformIterator( + r -> new SourceStateIterator<>(r, airbyteStream, initialLoadStateManager, + new StateEmitFrequency(syncCheckpointRecords, syncCheckpointDuration)), + recordIterator, new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(pair.getName(), pair.getNamespace())); + } + + private static boolean isCompositePrimaryKey(final ConfiguredAirbyteStream stream) { + return stream.getStream().getSourceDefinedPrimaryKey().size() > 1; + } + + public static long calculateChunkSize(final TableSizeInfo tableSizeInfo, final AirbyteStreamNameNamespacePair pair) { + // If table size info could not be calculated, a default chunk size will be provided. + if (tableSizeInfo == null || tableSizeInfo.tableSize() == 0 || tableSizeInfo.avgRowLength() == 0) { + LOGGER.info("Chunk size could not be determined for pair: {}, defaulting to {} rows", pair, DEFAULT_CHUNK_SIZE); + return DEFAULT_CHUNK_SIZE; + } + final long avgRowLength = tableSizeInfo.avgRowLength(); + final long chunkSize = QUERY_TARGET_SIZE_GB / avgRowLength; + LOGGER.info("Chunk size determined for pair: {}, is {}", pair, chunkSize); + return chunkSize; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java new file mode 100644 index 0000000000000..578c8b934e6b9 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.initialsync; + +import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcSnapshotForceShutdownMessage; +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifier; +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting; + +import com.google.common.collect.AbstractIterator; +import io.airbyte.cdk.db.JdbcCompatibleSourceOperations; +import io.airbyte.cdk.db.jdbc.AirbyteRecordData; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility; +import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus; +import io.airbyte.commons.exceptions.TransientErrorException; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.integrations.source.mssql.MssqlQueryUtils; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import java.sql.Connection; +import java.sql.JDBCType; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.time.Duration; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.stream.Stream; +import javax.annotation.CheckForNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings("try") +public class MssqlInitialLoadRecordIterator extends AbstractIterator + implements AutoCloseableIterator { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadRecordIterator.class); + + private AutoCloseableIterator currentIterator; + private final JdbcDatabase database; + private int numSubqueries = 0; + private final String quoteString; + private final JdbcCompatibleSourceOperations sourceOperations; + private final List columnNames; + private final AirbyteStreamNameNamespacePair pair; + private final MssqlInitialLoadStateManager initialLoadStateManager; + private final long chunkSize; + private final OrderedColumnInfo ocInfo; + private final boolean isCompositeKeyLoad; + private final Instant startInstant; + private Optional cdcInitialLoadTimeout; + private boolean isCdcSync; + + MssqlInitialLoadRecordIterator( + final JdbcDatabase database, + final JdbcCompatibleSourceOperations sourceOperations, + final String quoteString, + final MssqlInitialLoadStateManager initialLoadStateManager, + final List columnNames, + final AirbyteStreamNameNamespacePair pair, + final long chunkSize, + final boolean isCompositeKeyLoad, + final Instant startInstant, + final Optional cdcInitialLoadTimeout) { + this.database = database; + this.sourceOperations = sourceOperations; + this.quoteString = quoteString; + this.initialLoadStateManager = initialLoadStateManager; + this.columnNames = columnNames; + this.pair = pair; + this.chunkSize = chunkSize; + this.ocInfo = initialLoadStateManager.getOrderedColumnInfo(pair); + this.isCompositeKeyLoad = isCompositeKeyLoad; + this.startInstant = startInstant; + this.cdcInitialLoadTimeout = cdcInitialLoadTimeout; + this.isCdcSync = isCdcSync(initialLoadStateManager); + } + + @CheckForNull + @Override + protected AirbyteRecordData computeNext() { + if (isCdcSync && cdcInitialLoadTimeout.isPresent() + && Duration.between(startInstant, Instant.now()).compareTo(cdcInitialLoadTimeout.get()) > 0) { + final String cdcInitialLoadTimeoutMessage = String.format( + "Initial load has taken longer than %s hours, Canceling sync so that CDC replication can catch-up on subsequent attempt, and then initial snapshotting will resume", + cdcInitialLoadTimeout.get().toHours()); + LOGGER.info(cdcInitialLoadTimeoutMessage); + AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcSnapshotForceShutdownMessage()); + throw new TransientErrorException(cdcInitialLoadTimeoutMessage); + } + if (shouldBuildNextSubquery()) { + try { + // We will only issue one query for a composite key load. If we have already processed all the data + // associated with this + // query, we should indicate that we are done processing for the given stream. + if (isCompositeKeyLoad && numSubqueries >= 1) { + return endOfData(); + } + // Previous stream (and connection) must be manually closed in this iterator. + if (currentIterator != null) { + currentIterator.close(); + } + + LOGGER.info("Subquery number : {}", numSubqueries); + final Stream stream = database.unsafeQuery( + this::getOcPreparedStatement, sourceOperations::convertDatabaseRowToAirbyteRecordData); + currentIterator = AutoCloseableIterators.fromStream(stream, + new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(pair.getName(), pair.getNamespace())); + numSubqueries++; + // If the current subquery has no records associated with it, the entire stream has been read. + if (!currentIterator.hasNext()) { + return endOfData(); + } + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + return currentIterator.next(); + } + + private boolean shouldBuildNextSubquery() { + // The next sub-query should be built if (i) it is the first subquery in the sequence. (ii) the + // previous subquery has finished. + return (currentIterator == null || !currentIterator.hasNext()); + } + + private PreparedStatement getOcPreparedStatement(final Connection connection) { + try { + final String tableName = pair.getName(); + final String schemaName = pair.getNamespace(); + final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, tableName, + quoteString); + LOGGER.info("Preparing query for table: {}", fullTableName); + final String wrappedColumnNames = MssqlQueryUtils.getWrappedColumnNames(database, quoteString, columnNames, schemaName, tableName); + final OrderedColumnLoadStatus ocLoadStatus = initialLoadStateManager.getOrderedColumnLoadStatus(pair); + if (ocLoadStatus == null) { + final String quotedCursorField = enquoteIdentifier(ocInfo.ocFieldName(), quoteString); + final String sql; + if (isCompositeKeyLoad) { + sql = "SELECT %s FROM %s ORDER BY %s".formatted(wrappedColumnNames, fullTableName, quotedCursorField); + } else { + sql = "SELECT TOP %s %s FROM %s ORDER BY %s".formatted(chunkSize, wrappedColumnNames, fullTableName, quotedCursorField); + } + final PreparedStatement preparedStatement = connection.prepareStatement(sql); + LOGGER.info("Executing query for table {}: {}", tableName, sql); + return preparedStatement; + } else { + LOGGER.info("ocLoadStatus value is : {}", ocLoadStatus.getOrderedColVal()); + final String quotedCursorField = enquoteIdentifier(ocInfo.ocFieldName(), quoteString); + final String sql; + if (isCompositeKeyLoad) { + sql = "SELECT %s FROM %s WHERE %s >= ? ORDER BY %s".formatted(wrappedColumnNames, fullTableName, + quotedCursorField, quotedCursorField); + } else { + // The ordered column max value could be null - this can happen in the case of empty tables. In this + // case, + // we can just issue a query without any chunking. + if (ocInfo.ocMaxValue() != null) { + sql = "SELECT TOP %s %s FROM %s WHERE %s > ? AND %s <= ? ORDER BY %s".formatted(chunkSize, wrappedColumnNames, fullTableName, + quotedCursorField, quotedCursorField, quotedCursorField); + } else { + sql = "SELECT %s FROM %s WHERE %s > ? ORDER BY %s".formatted(wrappedColumnNames, fullTableName, + quotedCursorField, quotedCursorField); + } + } + final PreparedStatement preparedStatement = connection.prepareStatement(sql); + final JDBCType cursorFieldType = ocInfo.fieldType(); + sourceOperations.setCursorField(preparedStatement, 1, cursorFieldType, ocLoadStatus.getOrderedColVal()); + if (!isCompositeKeyLoad && ocInfo.ocMaxValue() != null) { + sourceOperations.setCursorField(preparedStatement, 2, cursorFieldType, ocInfo.ocMaxValue()); + } + LOGGER.info("Executing query for table {}: {}", tableName, sql); + return preparedStatement; + } + } catch (final SQLException e) { + throw new RuntimeException(e); + } + } + + @Override + public void close() throws Exception { + if (currentIterator != null) { + currentIterator.close(); + } + } + + private boolean isCdcSync(MssqlInitialLoadStateManager initialLoadStateManager) { + if (initialLoadStateManager instanceof MssqlInitialLoadGlobalStateManager) { + LOGGER.info("Running a cdc sync"); + return true; + } else { + LOGGER.info("Not running a cdc sync"); + return false; + } + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java new file mode 100644 index 0000000000000..b072729a9dfed --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.initialsync; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType; +import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus; +import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateMessageProducer; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; + +public abstract class MssqlInitialLoadStateManager implements SourceStateMessageProducer { + + public static final long MSSQL_STATE_VERSION = 2; + public static final String STATE_TYPE_KEY = "state_type"; + public static final String ORDERED_COL_STATE_TYPE = "ordered_column"; + protected Map pairToOrderedColLoadStatus; + protected Map pairToOrderedColInfo; + + private OrderedColumnLoadStatus ocStatus; + + protected Function streamStateForIncrementalRunSupplier; + + void setStreamStateForIncrementalRunSupplier(final Function streamStateForIncrementalRunSupplier) { + this.streamStateForIncrementalRunSupplier = streamStateForIncrementalRunSupplier; + } + + /** + * Updates the {@link OrderedColumnLoadStatus} for the state associated with the given pair. + * + * @param pair pair + * @param ocLoadStatus updated status + */ + public void updateOrderedColumnLoadState(final AirbyteStreamNameNamespacePair pair, final OrderedColumnLoadStatus ocLoadStatus) { + pairToOrderedColLoadStatus.put(pair, ocLoadStatus); + } + + /** + * Returns the previous state emitted. Represented as a {@link OrderedColumnLoadStatus} associated + * with the stream. + * + * @param pair pair + * @return load status + */ + public OrderedColumnLoadStatus getOrderedColumnLoadStatus(final AirbyteStreamNameNamespacePair pair) { + return pairToOrderedColLoadStatus.get(pair); + } + + /** + * Returns the current {@OrderedColumnInfo}, associated with the stream. This includes the data type + * and the column name associated with the stream. + * + * @param pair pair + * @return load status + */ + public OrderedColumnInfo getOrderedColumnInfo(final AirbyteStreamNameNamespacePair pair) { + return pairToOrderedColInfo.get(pair); + } + + static Map initPairToOrderedColumnLoadStatusMap( + final Map pairToOcStatus) { + return pairToOcStatus.entrySet().stream() + .collect(Collectors.toMap( + e -> new AirbyteStreamNameNamespacePair(e.getKey().getName(), e.getKey().getNamespace()), + Entry::getValue)); + } + + protected JsonNode getIncrementalState(final AirbyteStreamNameNamespacePair pair) { + final OrderedColumnLoadStatus currentOcLoadStatus = getOrderedColumnLoadStatus(pair); + return (currentOcLoadStatus == null || currentOcLoadStatus.getIncrementalState() == null) ? streamStateForIncrementalRunSupplier.apply(pair) + : currentOcLoadStatus.getIncrementalState(); + } + + @Override + public AirbyteMessage processRecordMessage(final ConfiguredAirbyteStream stream, final AirbyteMessage message) { + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + final String ocFieldName = getOrderedColumnInfo(pair).ocFieldName(); + final String lastOcVal = message.getRecord().getData().get(ocFieldName).asText(); + ocStatus = new OrderedColumnLoadStatus() + .withVersion(MSSQL_STATE_VERSION) + .withStateType(StateType.ORDERED_COLUMN) + .withOrderedCol(ocFieldName) + .withOrderedColVal(lastOcVal) + .withIncrementalState(getIncrementalState(stream)); + updateOrderedColumnLoadState(pair, ocStatus); + return message; + } + + @Override + public boolean shouldEmitStateMessage(final ConfiguredAirbyteStream stream) { + return Objects.nonNull(getOrderedColumnInfo(new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()))); + } + + private JsonNode getIncrementalState(final ConfiguredAirbyteStream stream) { + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + final OrderedColumnLoadStatus currentOcLoadStatus = getOrderedColumnLoadStatus(pair); + + return (currentOcLoadStatus == null || currentOcLoadStatus.getIncrementalState() == null) + ? streamStateForIncrementalRunSupplier.apply(pair) + : currentOcLoadStatus.getIncrementalState(); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java new file mode 100644 index 0000000000000..31fc03ab3948a --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.initialsync; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo; +import io.airbyte.protocol.models.v0.*; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This state manager extends the StreamStateManager to enable writing the state_type and version + * keys to the stream state when they're going through the iterator Once we have verified that + * expanding StreamStateManager itself to include this functionality, this class will be removed + */ +public class MssqlInitialLoadStreamStateManager extends MssqlInitialLoadStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadStateManager.class); + + public MssqlInitialLoadStreamStateManager(final ConfiguredAirbyteCatalog catalog, + final InitialLoadStreams initialLoadStreams, + final Map pairToOrderedColInfo) { + this.pairToOrderedColInfo = pairToOrderedColInfo; + this.pairToOrderedColLoadStatus = MssqlInitialLoadStateManager.initPairToOrderedColumnLoadStatusMap(initialLoadStreams.pairToInitialLoadStatus()); + this.streamStateForIncrementalRunSupplier = pair -> Jsons.emptyObject(); + } + + @Override + public AirbyteStateMessage createFinalStateMessage(final ConfiguredAirbyteStream stream) { + AirbyteStreamNameNamespacePair pair = + new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + final JsonNode incrementalState = getIncrementalState(pair); + // If there is no incremental state, save the latest OC state + // Such as in the case of full refresh + final JsonNode finalState; + if (incrementalState == null || incrementalState.isEmpty()) { + finalState = Jsons.jsonNode(getOrderedColumnLoadStatus(pair)); + } else { + finalState = incrementalState; + } + return new AirbyteStateMessage() + .withType(AirbyteStateType.STREAM) + .withStream(getAirbyteStreamState(pair, finalState)); + } + + @Override + public AirbyteStateMessage generateStateMessageAtCheckpoint(final ConfiguredAirbyteStream stream) { + AirbyteStreamNameNamespacePair pair = + new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + var ocStatus = getOrderedColumnLoadStatus(pair); + return new AirbyteStateMessage() + .withType(AirbyteStateType.STREAM) + .withStream(getAirbyteStreamState(pair, Jsons.jsonNode(ocStatus))); + } + + protected AirbyteStreamState getAirbyteStreamState(final AirbyteStreamNameNamespacePair pair, final JsonNode stateData) { + Preconditions.checkNotNull(pair); + Preconditions.checkNotNull(pair.getName()); + Preconditions.checkNotNull(pair.getNamespace()); + LOGGER.info("State data for {}: {}", pair.getNamespace().concat("_").concat(pair.getName()), stateData); + + return new AirbyteStreamState() + .withStreamDescriptor( + new StreamDescriptor().withName(pair.getName()).withNamespace(pair.getNamespace())) + .withStreamState(stateData); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java new file mode 100644 index 0000000000000..6679f7987b559 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java @@ -0,0 +1,558 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.initialsync; + +import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcCursorInvalidMessage; +import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcResyncMessage; +import static io.airbyte.cdk.db.DbAnalyticsUtils.wassOccurrenceMessage; +import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.FAIL_SYNC_OPTION; +import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.INVALID_CDC_CURSOR_POSITION_PROPERTY; +import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.RESYNC_DATA_OPTION; +import static io.airbyte.integrations.source.mssql.MssqlCdcHelper.getDebeziumProperties; +import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getTableSizeInfoForStreams; +import static io.airbyte.integrations.source.mssql.cdc.MssqlCdcStateConstants.MSSQL_CDC_OFFSET; +import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler.discoverClusteredIndexForStream; +import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.ORDERED_COL_STATE_TYPE; +import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.Sets; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility; +import io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter; +import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; +import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumEventConverter; +import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumPropertiesManager; +import io.airbyte.cdk.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil; +import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadTimeoutUtil; +import io.airbyte.cdk.integrations.source.relationaldb.TableInfo; +import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState; +import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus; +import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; +import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator; +import io.airbyte.commons.exceptions.ConfigErrorException; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.stream.AirbyteStreamStatusHolder; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.integrations.source.mssql.*; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes; +import io.airbyte.protocol.models.CommonField; +import io.airbyte.protocol.models.v0.*; +import io.debezium.connector.sqlserver.Lsn; +import java.sql.JDBCType; +import java.time.Duration; +import java.time.Instant; +import java.util.*; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlInitialReadUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialReadUtil.class); + private static final int MIN_QUEUE_SIZE = 1000; + private static final int MAX_QUEUE_SIZE = 10000; + + public record InitialLoadStreams(List streamsForInitialLoad, + Map pairToInitialLoadStatus) { + + } + + public record CursorBasedStreams(List streamsForCursorBased, + Map pairToCursorBasedStatus) { + + } + + public record OrderedColumnInfo(String ocFieldName, JDBCType fieldType, String ocMaxValue) {} + + public static Optional getMssqlFullRefreshInitialLoadHandler(final JdbcDatabase database, + final ConfiguredAirbyteCatalog catalog, + final MssqlInitialLoadStateManager initialLoadStateManager, + final StateManager stateManager, + final ConfiguredAirbyteStream fullRefreshStream, + final Instant emittedAt, + final String quoteString) { + final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager); + final InitialLoadStreams initialLoadStreams = + cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer); + + // State manager will need to know all streams in order to produce a state message + // But for initial load handler we only want to produce iterator on the single full refresh stream. + if (!initialLoadStreams.streamsForInitialLoad().isEmpty()) { + // Filter on initialLoadStream + var pair = new AirbyteStreamNameNamespacePair(fullRefreshStream.getStream().getName(), fullRefreshStream.getStream().getNamespace()); + var ocStatus = initialLoadStreams.pairToInitialLoadStatus.get(pair); + Map fullRefreshOcStatus; + if (ocStatus == null) { + fullRefreshOcStatus = Map.of(); + } else { + fullRefreshOcStatus = Map.of(pair, ocStatus); + } + + var fullRefreshStreamInitialLoad = new InitialLoadStreams(List.of(fullRefreshStream), fullRefreshOcStatus); + return Optional + .of(getMssqlInitialLoadHandler(database, emittedAt, quoteString, fullRefreshStreamInitialLoad, initialLoadStateManager, Optional.empty())); + } + return Optional.empty(); + } + + private static MssqlInitialLoadHandler getMssqlInitialLoadHandler(final JdbcDatabase database, + final Instant emittedAt, + final String quoteString, + final InitialLoadStreams initialLoadStreams, + final MssqlInitialLoadStateManager initialLoadStateManager, + final Optional metadataInjector) { + final JsonNode sourceConfig = database.getSourceConfig(); + + final MssqlSourceOperations sourceOperations = new MssqlSourceOperations(metadataInjector); + + return new MssqlInitialLoadHandler(sourceConfig, database, + sourceOperations, quoteString, initialLoadStateManager, + Optional.empty(), + getTableSizeInfoForStreams(database, initialLoadStreams.streamsForInitialLoad(), quoteString)); + } + + private static CdcState getCdcState(final JdbcDatabase database, + final ConfiguredAirbyteCatalog catalog, + final StateManager stateManager, + final boolean savedOffsetStillPresentOnServer) { + if (!savedOffsetStillPresentOnServer || (stateManager.getCdcStateManager().getCdcState() == null + || stateManager.getCdcStateManager().getCdcState().getState() == null)) { + // Construct the initial state for Mssql. If there is already existing state, we use that instead + // since that is associated with the debezium state associated with the initial sync. + final JsonNode initialDebeziumState = MssqlDebeziumStateUtil.constructInitialDebeziumState( + getDebeziumProperties(database, catalog, false), catalog, database); + return new CdcState().withState(initialDebeziumState); + } else { + return stateManager.getCdcStateManager().getCdcState(); + } + } + + public static boolean isSavedOffsetStillPresentOnServer(final JdbcDatabase database, + final ConfiguredAirbyteCatalog catalog, + final StateManager stateManager) { + final MssqlDebeziumStateUtil mssqlDebeziumStateUtil = new MssqlDebeziumStateUtil(); + final JsonNode sourceConfig = database.getSourceConfig(); + + final JsonNode state = + (stateManager.getCdcStateManager().getCdcState() == null || stateManager.getCdcStateManager().getCdcState().getState() == null) + ? MssqlDebeziumStateUtil.constructInitialDebeziumState(getDebeziumProperties(database, catalog, false), catalog, database) + : Jsons.clone(stateManager.getCdcStateManager().getCdcState().getState()); + + final Optional savedOffset = mssqlDebeziumStateUtil.savedOffset( + getDebeziumProperties(database, catalog, true), catalog, state.get(MSSQL_CDC_OFFSET), sourceConfig); + + final boolean savedOffsetStillPresentOnServer = + savedOffset.isPresent() && mssqlDebeziumStateUtil.savedOffsetStillPresentOnServer(database, savedOffset.get()); + + if (!savedOffsetStillPresentOnServer) { + AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcCursorInvalidMessage()); + if (!sourceConfig.get("replication_method").has(INVALID_CDC_CURSOR_POSITION_PROPERTY) || sourceConfig.get("replication_method").get( + INVALID_CDC_CURSOR_POSITION_PROPERTY).asText().equals(FAIL_SYNC_OPTION)) { + throw new ConfigErrorException( + "Saved offset no longer present on the server. Please reset the connection, and then increase binlog retention and/or increase sync frequency."); + } else if (sourceConfig.get("replication_method").get(INVALID_CDC_CURSOR_POSITION_PROPERTY).asText().equals(RESYNC_DATA_OPTION)) { + AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcResyncMessage()); + LOGGER.warn("Saved offset no longer present on the server, Airbyte is going to trigger a sync from scratch"); + } + } + return savedOffsetStillPresentOnServer; + } + + public static MssqlInitialLoadGlobalStateManager getMssqlInitialLoadGlobalStateManager(final JdbcDatabase database, + final ConfiguredAirbyteCatalog catalog, + final StateManager stateManager, + final Map>> tableNameToTable, + final String quoteString) { + final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager); + final InitialLoadStreams initialLoadStreams = + cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer); + final CdcState initialStateToBeUsed = getCdcState(database, catalog, stateManager, savedOffsetStillPresentOnServer); + + return new MssqlInitialLoadGlobalStateManager(initialLoadStreams, + initPairToOrderedColumnInfoMap(database, catalog, tableNameToTable, quoteString), + stateManager, catalog, initialStateToBeUsed); + } + + public static List> getCdcReadIterators(final JdbcDatabase database, + final ConfiguredAirbyteCatalog catalog, + final Map>> tableNameToTable, + final StateManager stateManager, + final MssqlInitialLoadStateManager initialLoadStateManager, + final Instant emittedAt, + final String quoteString) { + final JsonNode sourceConfig = database.getSourceConfig(); + final Duration firstRecordWaitTime = RecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); + LOGGER.info("First record waiting time: {} seconds", firstRecordWaitTime.getSeconds()); + final int queueSize = getQueueSize(sourceConfig); + LOGGER.info("Queue size: {}", queueSize); + final Duration initialLoadTimeout = InitialLoadTimeoutUtil.getInitialLoadTimeout(sourceConfig); + // Determine the streams that need to be loaded via primary key sync. + final List> initialLoadIterator = new ArrayList<>(); + final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager); + final InitialLoadStreams initialLoadStreams = + cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer); + final MssqlCdcConnectorMetadataInjector metadataInjector = MssqlCdcConnectorMetadataInjector.getInstance(emittedAt); + final CdcState stateToBeUsed = getCdcState(database, catalog, stateManager, savedOffsetStillPresentOnServer); + + // Debezium is started for streams that have been started - that is they have been partially or + // fully completed. + final var startedCdcStreamList = catalog.getStreams().stream() + .filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL) + .filter(stream -> isStreamPartiallyOrFullyCompleted(stream, initialLoadStreams)) + .map(stream -> stream.getStream().getNamespace() + "." + stream.getStream().getName()).toList(); + + final var allCdcStreamList = catalog.getStreams().stream() + .filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL) + .map(stream -> stream.getStream().getNamespace() + "." + stream.getStream().getName()).toList(); + + // If there are streams to sync via ordered column load, build the relevant iterators. + if (!initialLoadStreams.streamsForInitialLoad().isEmpty()) { + final MssqlDebeziumStateAttributes stateAttributes = MssqlDebeziumStateUtil.getStateAttributesFromDB(database); + final MssqlInitialLoadHandler initialLoadHandler = + getMssqlInitialLoadHandler(database, emittedAt, quoteString, initialLoadStreams, initialLoadStateManager, + Optional.of(new CdcMetadataInjector(emittedAt.toString(), stateAttributes, metadataInjector))); + // Because initial load streams will be followed by cdc read of those stream, we only decorate with + // complete status trace after CDC read is done. + initialLoadIterator.addAll(initialLoadHandler.getIncrementalIterators( + new ConfiguredAirbyteCatalog().withStreams(initialLoadStreams.streamsForInitialLoad()), + tableNameToTable, + emittedAt, false, false, Optional.empty())); + } + + final List> cdcStreamsStartStatusEmitters = catalog.getStreams().stream() + .filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL) + .map(stream -> (AutoCloseableIterator) new StreamStatusTraceEmitterIterator( + new AirbyteStreamStatusHolder( + new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()), + AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED))) + .toList(); + + final List> cdcStreamsEndStatusEmitters = catalog.getStreams().stream() + .filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL) + .map(stream -> (AutoCloseableIterator) new StreamStatusTraceEmitterIterator( + new AirbyteStreamStatusHolder( + new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()), + AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE))) + .toList(); + + // Build the incremental CDC iterators. + final var targetPosition = MssqlCdcTargetPosition.getTargetPosition(database, sourceConfig.get(JdbcUtils.DATABASE_KEY).asText()); + final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>( + sourceConfig, + targetPosition, + true, + firstRecordWaitTime, + queueSize, + false); + + final var eventConverter = new RelationalDbDebeziumEventConverter(metadataInjector, emittedAt); + + if (startedCdcStreamList.isEmpty()) { + LOGGER.info("First sync - no cdc streams have been completed or started"); + /* + * This is the first run case - no initial loads have been started. In this case, we want to run the + * iterators in the following order: 1. Run the initial load iterators. This step will timeout and + * throw a transient error if run for too long (> 8hrs by default). 2. Run the debezium iterators + * with ALL of the incremental streams configured. This is because if step 1 completes, the initial + * load can be considered finished. + */ + final var propertiesManager = + new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, allCdcStreamList); + final Supplier> incrementalIteratorsSupplier = getCdcIncrementalIteratorsSupplier(handler, + propertiesManager, eventConverter, stateToBeUsed, stateManager); + return Collections.singletonList( + AutoCloseableIterators.concatWithEagerClose( + Stream + .of( + cdcStreamsStartStatusEmitters, + initialLoadIterator, + Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorsSupplier, null)), + cdcStreamsEndStatusEmitters) + .flatMap(Collection::stream) + .collect(Collectors.toList()), + AirbyteTraceMessageUtility::emitStreamStatusTrace)); + } else if (initialLoadIterator.isEmpty()) { + LOGGER.info("Initial load has finished completely - only reading the binlog"); + /* + * In this case, the initial load has completed and only debezium should be run. The iterators + * should be run in the following order: 1. Run the debezium iterators with ALL of the incremental + * streams configured. + */ + final var propertiesManager = + new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, allCdcStreamList); + final Supplier> incrementalIteratorSupplier = getCdcIncrementalIteratorsSupplier(handler, + propertiesManager, eventConverter, stateToBeUsed, stateManager); + return Collections.singletonList( + AutoCloseableIterators.concatWithEagerClose( + Stream + .of( + cdcStreamsStartStatusEmitters, + Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorSupplier, null)), + cdcStreamsEndStatusEmitters) + .flatMap(Collection::stream) + .collect(Collectors.toList()), + AirbyteTraceMessageUtility::emitStreamStatusTrace)); + } else { + LOGGER.info("Initial load is in progress - reading binlog first and then resuming with initial load."); + /* + * In this case, the initial load has partially completed (WASS case). The iterators should be run + * in the following order: 1. Run the debezium iterators with only the incremental streams which + * have been fully or partially completed configured. 2. Resume initial load for partially completed + * and not started streams. This step will timeout and throw a transient error if run for too long + * (> 8hrs by default). + */ + AirbyteTraceMessageUtility.emitAnalyticsTrace(wassOccurrenceMessage()); + final var propertiesManager = + new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, startedCdcStreamList); + final Supplier> incrementalIteratorSupplier = getCdcIncrementalIteratorsSupplier(handler, + propertiesManager, eventConverter, stateToBeUsed, stateManager); + return Collections.singletonList( + AutoCloseableIterators.concatWithEagerClose( + Stream + .of( + cdcStreamsStartStatusEmitters, + Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorSupplier, null)), + initialLoadIterator, + cdcStreamsEndStatusEmitters) + .flatMap(Collection::stream) + .collect(Collectors.toList()), + AirbyteTraceMessageUtility::emitStreamStatusTrace)); + } + } + + public static InitialLoadStreams cdcStreamsForInitialOrderedColumnLoad(final CdcStateManager stateManager, + final ConfiguredAirbyteCatalog fullCatalog, + final boolean savedOffsetStillPresentOnServer) { + if (!savedOffsetStillPresentOnServer) { + // Add a filter here to identify resumable full refresh streams. + return new InitialLoadStreams( + fullCatalog.getStreams() + .stream() + .collect(Collectors.toList()), + new HashMap<>()); + } + final AirbyteStateMessage airbyteStateMessage = stateManager.getRawStateMessage(); + final Set streamsStillInOcSync = new HashSet<>(); + + // Build a map of stream <-> initial load status for streams that currently have an initial primary + // key load in progress. + final Map pairToInitialLoadStatus = new HashMap<>(); + if (airbyteStateMessage != null && airbyteStateMessage.getGlobal() != null && airbyteStateMessage.getGlobal().getStreamStates() != null) { + LOGGER.info("Trying to extract streams need initial oc sync. State message: {}", airbyteStateMessage); + airbyteStateMessage.getGlobal().getStreamStates().forEach(stateMessage -> { + LOGGER.info("State message in this stream: {}", stateMessage); + final JsonNode streamState = stateMessage.getStreamState(); + final StreamDescriptor streamDescriptor = stateMessage.getStreamDescriptor(); + if (streamState == null || streamDescriptor == null) { + return; + } + + if (streamState.has(STATE_TYPE_KEY)) { + if (streamState.get(STATE_TYPE_KEY).asText().equalsIgnoreCase(ORDERED_COL_STATE_TYPE)) { + final OrderedColumnLoadStatus orderedColumnLoadStatus = Jsons.object(streamState, OrderedColumnLoadStatus.class); + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamDescriptor.getName(), + streamDescriptor.getNamespace()); + pairToInitialLoadStatus.put(pair, orderedColumnLoadStatus); + streamsStillInOcSync.add(pair); + } + } + }); + } + + final List streamForOcSync = new ArrayList<>(); + fullCatalog.getStreams().stream() + .filter(stream -> streamsStillInOcSync.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream()))) + .map(Jsons::clone) + .forEach(streamForOcSync::add); + final List newlyAddedStreams = identifyStreamsToSnapshot(fullCatalog, stateManager.getInitialStreamsSynced()); + streamForOcSync.addAll(newlyAddedStreams); + + return new InitialLoadStreams(streamForOcSync, pairToInitialLoadStatus); + } + + public static Map initPairToOrderedColumnInfoMap( + final JdbcDatabase database, + final ConfiguredAirbyteCatalog catalog, + final Map>> tableNameToTable, + final String quoteString) { + final Map pairToOcInfoMap = new HashMap<>(); + // For every stream that is in initial ordered column sync, we want to maintain information about + // the current ordered column info associated with the stream + catalog.getStreams().forEach(stream -> { + final AirbyteStreamNameNamespacePair pair = + new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + final Optional ocInfo = getOrderedColumnInfo(database, stream, tableNameToTable, quoteString); + if (ocInfo.isPresent()) { + pairToOcInfoMap.put(pair, ocInfo.get()); + } + }); + return pairToOcInfoMap; + } + + static Optional getOrderedColumnInfo(final JdbcDatabase database, + final ConfiguredAirbyteStream stream, + final Map>> tableNameToTable, + final String quoteString) { + final String fullyQualifiedTableName = + DbSourceDiscoverUtil.getFullyQualifiedTableName(stream.getStream().getNamespace(), stream.getStream().getName()); + final TableInfo> table = tableNameToTable + .get(fullyQualifiedTableName); + return getOrderedColumnInfo(database, stream, table, quoteString); + } + + static Optional getOrderedColumnInfo(final JdbcDatabase database, + final ConfiguredAirbyteStream stream, + final TableInfo> table, + final String quoteString) { + // For cursor-based syncs, we cannot always assume a ordered column field exists. We need to handle + // the case where it does not exist when we support cursor-based syncs. + // if (stream.getStream().getSourceDefinedPrimaryKey().size() > 1) { + // LOGGER.info("Composite primary key detected for {namespace, stream} : {}, {}", + // stream.getStream().getNamespace(), stream.getStream().getName()); + // } // TODO: validate the seleted column rather than primary key + final String clusterdIndexField = discoverClusteredIndexForStream(database, stream.getStream()); + final String ocFieldName; + if (clusterdIndexField != null) { + ocFieldName = clusterdIndexField; + } else { + if (stream.getStream().getSourceDefinedPrimaryKey().isEmpty()) { + return Optional.empty(); + } + ocFieldName = stream.getStream().getSourceDefinedPrimaryKey().getFirst().getFirst(); + } + + LOGGER.info("selected ordered column field name: " + ocFieldName); + final JDBCType ocFieldType = table.getFields().stream() + .filter(field -> field.getName().equals(ocFieldName)) + .findFirst().get().getType(); + final String ocMaxValue = MssqlQueryUtils.getMaxOcValueForStream(database, stream, ocFieldName, quoteString); + return Optional.of(new OrderedColumnInfo(ocFieldName, ocFieldType, ocMaxValue)); + } + + public static List identifyStreamsToSnapshot(final ConfiguredAirbyteCatalog catalog, + final Set alreadySyncedStreams) { + final Set allStreams = AirbyteStreamNameNamespacePair.fromConfiguredCatalog(catalog); + final Set newlyAddedStreams = new HashSet<>(Sets.difference(allStreams, alreadySyncedStreams)); + // Add a filter here to identify resumable full refresh streams. + return catalog.getStreams().stream() + .filter(stream -> newlyAddedStreams.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream()))) + .map(Jsons::clone) + .collect(Collectors.toList()); + } + + public static InitialLoadStreams streamsForInitialOrderedColumnLoad(final StateManager stateManager, + final ConfiguredAirbyteCatalog fullCatalog) { + + final List rawStateMessages = stateManager.getRawStateMessages(); + final Set streamsStillInOrderedColumnSync = new HashSet<>(); + final Set alreadySeenStreamPairs = new HashSet<>(); + + // Build a map of stream <-> initial load status for streams that currently have an initial primary + // key load in progress. + final Map pairToInitialLoadStatus = new HashMap<>(); + LOGGER.info("raw state message: " + rawStateMessages); + if (rawStateMessages != null) { + rawStateMessages.forEach(stateMessage -> { + final AirbyteStreamState stream = stateMessage.getStream(); + final JsonNode streamState = stream.getStreamState(); + final StreamDescriptor streamDescriptor = stateMessage.getStream().getStreamDescriptor(); + if (streamState == null || streamDescriptor == null) { + return; + } + + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamDescriptor.getName(), + streamDescriptor.getNamespace()); + + // Build a map of stream <-> initial load status for streams that currently have an initial primary + // key load in progress. + + if (streamState.has(STATE_TYPE_KEY)) { + if (streamState.get(STATE_TYPE_KEY).asText().equalsIgnoreCase(ORDERED_COL_STATE_TYPE)) { + final OrderedColumnLoadStatus orderedColumnLoadStatus = Jsons.object(streamState, OrderedColumnLoadStatus.class); + pairToInitialLoadStatus.put(pair, orderedColumnLoadStatus); + streamsStillInOrderedColumnSync.add(pair); + } + } + alreadySeenStreamPairs.add(new AirbyteStreamNameNamespacePair(streamDescriptor.getName(), streamDescriptor.getNamespace())); + }); + } + final List streamsForOcSync = new ArrayList<>(); + LOGGER.info("alreadySeenStreamPairs: {}", alreadySeenStreamPairs); + fullCatalog.getStreams().stream() + .filter(stream -> streamsStillInOrderedColumnSync.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream()))) + .map(Jsons::clone) + .forEach(streamsForOcSync::add); + + final List newlyAddedStreams = identifyStreamsToSnapshot(fullCatalog, + Collections.unmodifiableSet(alreadySeenStreamPairs)); + streamsForOcSync.addAll(newlyAddedStreams); + LOGGER.info("streamsForOcSync: {}", streamsForOcSync); + return new InitialLoadStreams(streamsForOcSync.stream().filter((stream) -> !stream.getStream().getSourceDefinedPrimaryKey() + .isEmpty()).collect(Collectors.toList()), + pairToInitialLoadStatus); + } + + private static OptionalInt extractQueueSizeFromConfig(final JsonNode config) { + final JsonNode replicationMethod = config.get("replication_method"); + if (replicationMethod != null && replicationMethod.has("queue_size")) { + final int queueSize = config.get("replication_method").get("queue_size").asInt(); + return OptionalInt.of(queueSize); + } + return OptionalInt.empty(); + } + + @SuppressWarnings("unchecked") + private static Supplier> getCdcIncrementalIteratorsSupplier(AirbyteDebeziumHandler handler, + RelationalDbDebeziumPropertiesManager propertiesManager, + DebeziumEventConverter eventConverter, + CdcState stateToBeUsed, + StateManager stateManager) { + return () -> handler.getIncrementalIterators( + propertiesManager, eventConverter, new MssqlCdcSavedInfoFetcher(stateToBeUsed), new MssqlCdcStateHandler(stateManager)); + } + + private static boolean isStreamPartiallyOrFullyCompleted(ConfiguredAirbyteStream stream, InitialLoadStreams initialLoadStreams) { + boolean isStreamCompleted = !initialLoadStreams.streamsForInitialLoad.contains(stream); + // A stream has been partially completed if an initial load status exists. + boolean isStreamPartiallyCompleted = (initialLoadStreams.pairToInitialLoadStatus + .get(new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()))) != null; + return isStreamCompleted || isStreamPartiallyCompleted; + } + + public static int getQueueSize(final JsonNode config) { + final OptionalInt sizeFromConfig = extractQueueSizeFromConfig(config); + if (sizeFromConfig.isPresent()) { + final int size = sizeFromConfig.getAsInt(); + if (size < MIN_QUEUE_SIZE) { + LOGGER.warn("Queue size is overridden to {} , which is the min allowed for safety.", + MIN_QUEUE_SIZE); + return MIN_QUEUE_SIZE; + } else if (size > MAX_QUEUE_SIZE) { + LOGGER.warn("Queue size is overridden to {} , which is the max allowed for safety.", + MAX_QUEUE_SIZE); + return MAX_QUEUE_SIZE; + } + return size; + } + return MAX_QUEUE_SIZE; + } + + public static InitialLoadStreams filterStreamInIncrementalMode(final InitialLoadStreams stream) { + return new InitialLoadStreams( + stream.streamsForInitialLoad.stream().filter(airbyteStream -> airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) + .collect(Collectors.toList()), + stream.pairToInitialLoadStatus); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/main/resources/spec.json b/airbyte-integrations/connectors/source-mssql/src.bak/main/resources/spec.json new file mode 100644 index 0000000000000..d2c2b2b0f4234 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/main/resources/spec.json @@ -0,0 +1,188 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/mssql", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MSSQL Source Spec", + "type": "object", + "required": ["host", "port", "database", "username", "password"], + "properties": { + "host": { + "description": "The hostname of the database.", + "title": "Host", + "type": "string", + "order": 0 + }, + "port": { + "description": "The port of the database.", + "title": "Port", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "examples": ["1433"], + "order": 1 + }, + "database": { + "description": "The name of the database.", + "title": "Database", + "type": "string", + "examples": ["master"], + "order": 2 + }, + "schemas": { + "title": "Schemas", + "description": "The list of schemas to sync from. Defaults to user. Case sensitive.", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0, + "uniqueItems": true, + "default": ["dbo"], + "order": 3 + }, + "username": { + "description": "The username which is used to access the database.", + "title": "Username", + "type": "string", + "order": 4 + }, + "password": { + "description": "The password associated with the username.", + "title": "Password", + "type": "string", + "airbyte_secret": true, + "order": 5 + }, + "jdbc_url_params": { + "title": "JDBC URL Params", + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "type": "string", + "order": 6 + }, + "ssl_method": { + "title": "SSL Method", + "type": "object", + "description": "The encryption method which is used when communicating with the database.", + "order": 7, + "oneOf": [ + { + "title": "Unencrypted", + "description": "Data transfer will not be encrypted.", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "const": "unencrypted" + } + } + }, + { + "title": "Encrypted (trust server certificate)", + "description": "Use the certificate provided by the server without verification. (For testing purposes only!)", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "const": "encrypted_trust_server_certificate" + } + } + }, + { + "title": "Encrypted (verify certificate)", + "description": "Verify and use the certificate provided by the server.", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "const": "encrypted_verify_certificate" + }, + "hostNameInCertificate": { + "title": "Host Name In Certificate", + "type": "string", + "description": "Specifies the host name of the server. The value of this property must match the subject property of the certificate.", + "order": 0 + }, + "certificate": { + "title": "Certificate", + "type": "string", + "description": "certificate of the server, or of the CA that signed the server certificate", + "order": 1, + "airbyte_secret": true, + "multiline": true + } + } + } + ] + }, + "replication_method": { + "type": "object", + "title": "Update Method", + "description": "Configures how data is extracted from the database.", + "default": "CDC", + "display_type": "radio", + "order": 8, + "oneOf": [ + { + "title": "Read Changes using Change Data Capture (CDC)", + "description": "Recommended - Incrementally reads new inserts, updates, and deletes using the SQL Server's change data capture feature. This must be enabled on your database.", + "required": ["method"], + "properties": { + "method": { + "type": "string", + "const": "CDC", + "order": 0 + }, + "initial_waiting_seconds": { + "type": "integer", + "title": "Initial Waiting Time in Seconds (Advanced)", + "description": "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds. Read about initial waiting time.", + "default": 300, + "min": 120, + "max": 3600, + "order": 3 + }, + "invalid_cdc_cursor_position_behavior": { + "type": "string", + "title": "Invalid CDC position behavior (Advanced)", + "description": "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.", + "enum": ["Fail sync", "Re-sync data"], + "default": "Fail sync", + "order": 4 + }, + "queue_size": { + "type": "integer", + "title": "Size of the queue (Advanced)", + "description": "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.", + "default": 10000, + "order": 5, + "min": 1000, + "max": 10000 + }, + "initial_load_timeout_hours": { + "type": "integer", + "title": "Initial Load Timeout in Hours (Advanced)", + "description": "The amount of time an initial load is allowed to continue for before catching up on CDC logs.", + "default": 8, + "min": 4, + "max": 24, + "order": 6 + } + } + }, + { + "title": "Scan Changes with User Defined Cursor", + "description": "Incrementally detects new inserts and updates using the cursor column chosen when configuring a connection (e.g. created_at, updated_at).", + "required": ["method"], + "properties": { + "method": { + "type": "string", + "const": "STANDARD", + "order": 0 + } + } + } + ] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java new file mode 100644 index 0000000000000..462583b9a2659 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest; +import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.protocol.models.JsonSchemaType; + +public abstract class AbstractMssqlSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { + + protected MsSQLTestDatabase testdb; + + @Override + protected String getNameSpace() { + return "dbo"; + } + + @Override + protected String getImageName() { + return "airbyte/source-mssql:dev"; + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } + + protected static final String CREATE_TABLE_SQL = "CREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)"; + + @Override + protected void initTests() { + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bigint") + .airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("-9223372036854775808", "9223372036854775807", "0", "null") + .addExpectedValues("-9223372036854775808", "9223372036854775807", "0", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("int") + .airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "-2147483648", "2147483647") + .addExpectedValues(null, "-2147483648", "2147483647") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("smallint") + .airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "-32768", "32767") + .addExpectedValues(null, "-32768", "32767") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("tinyint") + .airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "0", "255") + .addExpectedValues(null, "0", "255") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bit") + .airbyteType(JsonSchemaType.BOOLEAN) + .addInsertValues("null", "0", "1", "'true'", "'false'") + .addExpectedValues(null, "false", "true", "true", "false") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("decimal") + .fullSourceDataType("DECIMAL(5,2)") + .airbyteType(JsonSchemaType.NUMBER) + .addInsertValues("999.33", "null") + .addExpectedValues("999.33", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("numeric") + .airbyteType(JsonSchemaType.NUMBER) + .addInsertValues("'99999'", "null") + .addExpectedValues("99999", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("money") + .airbyteType(JsonSchemaType.NUMBER) + .addInsertValues("null", "'9990000.3647'") + .addExpectedValues(null, "9990000.3647") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("smallmoney") + .airbyteType(JsonSchemaType.NUMBER) + .addInsertValues("null", "'-214748.3648'", "214748.3647") + .addExpectedValues(null, "-214748.3648", "214748.3647") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("float") + .airbyteType(JsonSchemaType.NUMBER) + .addInsertValues("'123'", "'1234567890.1234567'", "null") + .addExpectedValues("123.0", "1.2345678901234567E9", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData(TestDataHolder.builder() + .sourceType("real") + .airbyteType(JsonSchemaType.NUMBER) + .addInsertValues("'123'", "'1234567890.1234567'", "null") + .addExpectedValues("123.0", "1.234568E9", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("date") + .airbyteType(JsonSchemaType.STRING_DATE) + .addInsertValues("'0001-01-01'", "'9999-12-31'", "'1999-01-08'", "null") + .addExpectedValues("0001-01-01", "9999-12-31", "1999-01-08", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("smalldatetime") + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE) + .addInsertValues("'1900-01-01'", "'2079-06-06'", "null") + .addExpectedValues("1900-01-01T00:00:00.000000", "2079-06-06T00:00:00.000000", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("datetime") + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE) + .addInsertValues("'1753-01-01'", "'9999-12-31'", "'9999-12-31T13:00:04'", + "'9999-12-31T13:00:04.123'", "null") + .addExpectedValues("1753-01-01T00:00:00.000000", "9999-12-31T00:00:00.000000", "9999-12-31T13:00:04.000000", + "9999-12-31T13:00:04.123000", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("datetime2") + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE) + .addInsertValues("'0001-01-01'", "'9999-12-31'", "'9999-12-31T13:00:04.123456'", "null", "'2023-11-08T01:20:11.3733338'") + .addExpectedValues("0001-01-01T00:00:00.000000", "9999-12-31T00:00:00.000000", "9999-12-31T13:00:04.123456", null, + "2023-11-08T01:20:11.373333") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("time") + .airbyteType(JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE) + .addInsertValues("null", "'13:00:01'", "'13:00:04Z'", "'13:00:04.123456Z'") + .addExpectedValues(null, "13:00:01", "13:00:04", "13:00:04.123456") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("datetimeoffset") + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE) + .addInsertValues("'2001-01-10 00:00:00 +01:00'", "'9999-01-10 00:00:00 +01:00'", "null", "'2024-05-10 19:00:01.604805 +03:00'", + "'2024-03-02 19:08:07.1234567 +09:00'", "'2024-03-02 19:08:07.12345678 +09:00'") + .addExpectedValues("2001-01-10T00:00:00.000000+01:00", + "9999-01-10T00:00:00.000000+01:00", null, "2024-05-10T19:00:01.604805+03:00", "2024-03-02T19:08:07.123456+09:00", + "2024-03-02T19:08:07.123456+09:00") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("char") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'a'", "'*'", "null") + .addExpectedValues("a", "*", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("varchar") + .fullSourceDataType("varchar(max) COLLATE Latin1_General_100_CI_AI_SC_UTF8") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'", + "''", "null", "N'\\xF0\\x9F\\x9A\\x80'") + .addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "", + null, "\\xF0\\x9F\\x9A\\x80") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("text") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'a'", "'abc'", "'Some test text 123$%^&*()_'", "''", "null") + .addExpectedValues("a", "abc", "Some test text 123$%^&*()_", "", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("nchar") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'a'", "'*'", "N'ї'", "null") + .addExpectedValues("a", "*", "ї", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("nvarchar") + .fullSourceDataType("nvarchar(max)") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'", + "''", "null", "N'\\xF0\\x9F\\x9A\\x80'") + .addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "", + null, "\\xF0\\x9F\\x9A\\x80") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("ntext") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'", + "''", "null", "N'\\xF0\\x9F\\x9A\\x80'") + .addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "", + null, "\\xF0\\x9F\\x9A\\x80") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("binary") + .airbyteType(JsonSchemaType.STRING_BASE_64) + .addInsertValues("CAST( 'A' AS BINARY(1))", "null") + .addExpectedValues("QQ==", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("varbinary") + .fullSourceDataType("varbinary(3)") + .airbyteType(JsonSchemaType.STRING_BASE_64) + .addInsertValues("CAST( 'ABC' AS VARBINARY)", "null") + .addExpectedValues("QUJD", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + // Proper select query example: SELECT test_column.STAsText() from dbo_1_geometry; + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("geometry") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("geometry::STGeomFromText('LINESTRING (100 100, 20 180, 180 180)', 0)", + "null") + .addExpectedValues("LINESTRING(100 100, 20 180, 180 180)", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("uniqueidentifier") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'375CFC44-CAE3-4E43-8083-821D2DF0E626'", "null") + .addExpectedValues("375CFC44-CAE3-4E43-8083-821D2DF0E626", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("xml") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues( + "'1'", "null", "''") + .addExpectedValues("1", null, "") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + // Proper select query example: SELECT test_column.STAsText() from dbo_1_geography; + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("geography") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues( + "geography::STGeomFromText('LINESTRING(-122.360 47.656, -122.343 47.656 )', 4326)", + "null") + .addExpectedValues("LINESTRING(-122.36 47.656, -122.343 47.656)", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + // sql_variant is not supported by debezium, always getting null. So only works for regular sync. + // The hierarchyid is returned in binary state, but mssql doesn't provide any parcers for it. + // On a regular sync we do a pre-flight request and then do additional wrap to sql query in case + // if we have hierarchyid. But this option is not available as we use a third-party tool "Debezium" + // as a CDC client. + if (this instanceof MssqlSourceDatatypeTest) { + // create table dbo_1_hierarchyid1 (test_column hierarchyid); + // insert dbo_1_hierarchyid1 values ('/1/1/'); + // select test_column ,test_column.ToString() AS [Node Text],test_column.GetLevel() [Node Level] + // from dbo_1_hierarchyid1; + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("hierarchyid") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'/1/1/'", "null") + .addExpectedValues("/1/1/", null) + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("sql_variant") + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'", + "''", "null", "N'\\xF0\\x9F\\x9A\\x80'") + .addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "", + null, "\\xF0\\x9F\\x9A\\x80") + .createTablePatternSql(CREATE_TABLE_SQL) + .build()); + + } + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("int") + .airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "1234", "7878") + .addExpectedValues(null, "1234", "7878") + .createTablePatternSql("CREATE TABLE %1$s(%2$s INTEGER NULL DEFAULT ((7878)), %3$s %4$s)") + .build()); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java new file mode 100644 index 0000000000000..41242ca839753 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.Lists; +import io.airbyte.cdk.db.Database; +import io.airbyte.cdk.db.factory.DSLContextFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; +import io.airbyte.cdk.integrations.base.ssh.SshHelpers; +import io.airbyte.cdk.integrations.base.ssh.SshTunnel; +import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.commons.functional.CheckedFunction; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.ConnectorSpecification; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import io.airbyte.protocol.models.v0.SyncMode; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.HashMap; +import java.util.List; +import org.jooq.SQLDialect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractSshMssqlSourceAcceptanceTest extends SourceAcceptanceTest { + + static private final Logger LOGGER = LoggerFactory.getLogger(AbstractSshMssqlSourceAcceptanceTest.class); + + private static final String SCHEMA_NAME = "dbo"; + private static final String STREAM_NAME = "id_and_name"; + private static final String STREAM_NAME2 = "starships"; + + public abstract SshTunnel.TunnelMethod getTunnelMethod(); + + private final SshBastionContainer bastion = new SshBastionContainer(); + private MsSQLTestDatabase testdb; + + @Override + protected JsonNode getConfig() { + try { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), true)) + .build(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + private void populateDatabaseTestData() throws Exception { + final var outerConfig = testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withoutSsl() + .with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), false)) + .build(); + SshTunnel.sshWrap( + outerConfig, + JdbcUtils.HOST_LIST_KEY, + JdbcUtils.PORT_LIST_KEY, + (CheckedFunction, Exception>) mangledConfig -> getDatabaseFromConfig(mangledConfig) + .query(ctx -> { + ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); + ctx.fetch("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); + return null; + })); + } + + private static Database getDatabaseFromConfig(final JsonNode config) { + return new Database( + DSLContextFactory.create( + config.get(JdbcUtils.USERNAME_KEY).asText(), + config.get(JdbcUtils.PASSWORD_KEY).asText(), + DatabaseDriver.MSSQLSERVER.getDriverClassName(), + String.format(DatabaseDriver.MSSQLSERVER.getUrlFormatString(), + config.get(JdbcUtils.HOST_KEY).asText(), + config.get(JdbcUtils.PORT_KEY).asInt(), + config.get(JdbcUtils.DATABASE_KEY).asText()) + ";encrypt=false;trustServerCertificate=true", + SQLDialect.DEFAULT)); + } + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { + testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022); + LOGGER.info("starting bastion"); + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); + LOGGER.info("bastion started"); + populateDatabaseTestData(); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + bastion.stopAndClose(); + } + + @Override + protected String getImageName() { + return "airbyte/source-mssql:dev"; + } + + @Override + protected ConnectorSpecification getSpec() throws Exception { + return SshHelpers.getSpecAndInjectSsh(); + } + + @Override + protected ConfiguredAirbyteCatalog getConfiguredCatalog() { + return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( + new ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME, SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withSupportedSyncModes( + Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))), + new ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME2, SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withSupportedSyncModes( + Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))))); + } + + @Override + protected JsonNode getState() { + return Jsons.jsonNode(new HashMap<>()); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java new file mode 100644 index 0000000000000..e9179d2b8b951 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.protocol.models.v0.SyncMode.FULL_REFRESH; +import static io.airbyte.protocol.models.v0.SyncMode.INCREMENTAL; +import static org.junit.jupiter.api.Assertions.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import io.airbyte.cdk.integrations.base.ssh.SshHelpers; +import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.ConnectorSpecification; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import io.airbyte.protocol.models.v0.SyncMode; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import org.junit.Assert; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +@TestInstance(Lifecycle.PER_METHOD) +@Execution(ExecutionMode.CONCURRENT) +public class CdcMssqlSourceAcceptanceTest extends SourceAcceptanceTest { + + private static final String SCHEMA_NAME = "dbo"; + private static final String STREAM_NAME = "id_and_name"; + private static final String STREAM_NAME2 = "starships"; + private static final String CDC_ROLE_NAME = "cdc_selector"; + private static final String STREAM_NAME3 = "stream3"; + + private MsSQLTestDatabase testdb; + + @Override + protected String getImageName() { + return "airbyte/source-mssql:dev"; + } + + @Override + protected ConnectorSpecification getSpec() throws Exception { + return SshHelpers.getSpecAndInjectSsh(); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withoutSsl() + .build(); + } + + @Override + protected ConfiguredAirbyteCatalog getConfiguredCatalog() { + return new ConfiguredAirbyteCatalog().withStreams(getConfiguredAirbyteStreams()); + } + + protected List getConfiguredAirbyteStreams() { + return Lists.newArrayList( + new ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME, SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withSourceDefinedCursor(true) + .withSourceDefinedPrimaryKey(List.of(List.of("id"))) + .withSupportedSyncModes( + Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))), + new ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME2, SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withSourceDefinedCursor(true) + .withSourceDefinedPrimaryKey(List.of(List.of("id"))) + .withSupportedSyncModes( + Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)))); + } + + @Override + protected JsonNode getState() { + return null; + } + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT); + testdb + .withWaitUntilAgentRunning() + .withCdc() + // create tables + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME) + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2) + .with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3) + // populate tables + .with("INSERT INTO %s.%s (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", SCHEMA_NAME, STREAM_NAME) + .with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');", SCHEMA_NAME, STREAM_NAME2) + .with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3) + // enable cdc on tables for designated role + .withCdcForTable(SCHEMA_NAME, STREAM_NAME, CDC_ROLE_NAME) + .withCdcForTable(SCHEMA_NAME, STREAM_NAME2, CDC_ROLE_NAME) + .withCdcForTable(SCHEMA_NAME, STREAM_NAME3, CDC_ROLE_NAME) + // revoke user permissions + .with("REVOKE ALL FROM %s CASCADE;", testdb.getUserName()) + .with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testdb.getUserName()) + // grant user permissions + .with("EXEC sp_addrolemember N'%s', N'%s';", "db_datareader", testdb.getUserName()) + .with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName()) + .with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testdb.getUserName()) + .withWaitUntilMaxLsnAvailable(); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } + + @Test + void testAddNewStreamToExistingSync() throws Exception { + final ConfiguredAirbyteCatalog configuredCatalogWithOneStream = + new ConfiguredAirbyteCatalog().withStreams(List.of(getConfiguredAirbyteStreams().get(0))); + + // Start a sync with one stream + final List messages = runRead(configuredCatalogWithOneStream); + final List recordMessages = filterRecords(messages); + final List stateMessages = filterStateMessages(messages); + final List streamStates = stateMessages.get(0).getGlobal().getStreamStates(); + + assertEquals(3, recordMessages.size()); + assertEquals(2, stateMessages.size()); + assertEquals(1, streamStates.size()); + assertEquals(STREAM_NAME, streamStates.get(0).getStreamDescriptor().getName()); + assertEquals(SCHEMA_NAME, streamStates.get(0).getStreamDescriptor().getNamespace()); + + final AirbyteStateMessage lastStateMessage = Iterables.getLast(stateMessages); + + final ConfiguredAirbyteCatalog configuredCatalogWithTwoStreams = configuredCatalogWithOneStream.withStreams(getConfiguredAirbyteStreams()); + + // Start another sync with a newly added stream + final List messages2 = runRead(configuredCatalogWithTwoStreams, Jsons.jsonNode(List.of(lastStateMessage))); + final List recordMessages2 = filterRecords(messages2); + final List stateMessages2 = filterStateMessages(messages2); + + assertEquals(3, recordMessages2.size()); + assertEquals(2, stateMessages2.size()); + + final AirbyteStateMessage lastStateMessage2 = Iterables.getLast(stateMessages2); + final List streamStates2 = lastStateMessage2.getGlobal().getStreamStates(); + + assertEquals(2, streamStates2.size()); + + assertEquals(STREAM_NAME, streamStates2.get(0).getStreamDescriptor().getName()); + assertEquals(SCHEMA_NAME, streamStates2.get(0).getStreamDescriptor().getNamespace()); + assertEquals(STREAM_NAME2, streamStates2.get(1).getStreamDescriptor().getName()); + assertEquals(SCHEMA_NAME, streamStates2.get(1).getStreamDescriptor().getNamespace()); + } + + private List filterStateMessages(final List messages) { + return messages.stream().filter(r -> r.getType() == AirbyteMessage.Type.STATE).map(AirbyteMessage::getState) + .collect(Collectors.toList()); + } + + @Test + protected void testNullValueConversion() throws Exception { + final List configuredAirbyteStreams = + Lists.newArrayList(new ConfiguredAirbyteStream() + .withSyncMode(INCREMENTAL) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream(STREAM_NAME3, + SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING), + Field.of("userid", JsonSchemaType.NUMBER)) + .withSourceDefinedCursor(true) + .withSourceDefinedPrimaryKey(List.of(List.of("id"))) + .withSupportedSyncModes(Lists.newArrayList(FULL_REFRESH, INCREMENTAL)))); + + final ConfiguredAirbyteCatalog configuredCatalogWithOneStream = + new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0))); + + final List airbyteMessages = runRead(configuredCatalogWithOneStream, getState()); + final List recordMessages = filterRecords(airbyteMessages); + final List stateMessages = airbyteMessages + .stream() + .filter(m -> m.getType() == AirbyteMessage.Type.STATE) + .map(AirbyteMessage::getState) + .collect(Collectors.toList()); + Assert.assertEquals(recordMessages.size(), 1); + assertFalse(stateMessages.isEmpty(), "Reason"); + ObjectMapper mapper = new ObjectMapper(); + + assertTrue(cdcFieldsOmitted(recordMessages.get(0).getData()).equals( + mapper.readTree("{\"id\":4, \"name\":\"voyager\", \"userid\":null}"))); + + // when we run incremental sync again there should be no new records. Run a sync with the latest + // state message and assert no records were emitted. + JsonNode latestState = extractLatestState(stateMessages); + + testdb.getDatabase().query(c -> c.query("INSERT INTO %s.%s (id, name) VALUES (5,'deep space nine')".formatted(SCHEMA_NAME, STREAM_NAME3))) + .execute(); + + assert Objects.nonNull(latestState); + final List secondSyncRecords = filterRecords(runRead(configuredCatalogWithOneStream, latestState)); + assertFalse( + secondSyncRecords.isEmpty(), + "Expected the second incremental sync to produce records."); + assertEquals(cdcFieldsOmitted(secondSyncRecords.get(0).getData()), + mapper.readTree("{\"id\":5, \"name\":\"deep space nine\", \"userid\":null}")); + } + + private JsonNode cdcFieldsOmitted(final JsonNode node) { + ObjectMapper mapper = new ObjectMapper(); + ObjectNode object = mapper.createObjectNode(); + node.fieldNames().forEachRemaining(name -> { + if (!name.toLowerCase().startsWith("_ab_cdc_")) { + object.put(name, node.get(name)); + } + }); + return object; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java new file mode 100644 index 0000000000000..892ef15935728 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.Database; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +@TestInstance(Lifecycle.PER_METHOD) +@Execution(ExecutionMode.CONCURRENT) +public class CdcMssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest { + + private final ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withoutSsl() + .build(); + } + + @Override + protected Database setupDatabase() { + testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT) + .withCdc(); + return testdb.getDatabase(); + } + + protected void createTables() throws Exception { + List> createTableTasks = new ArrayList<>(); + List> enableCdcForTableTasks = new ArrayList<>(); + for (var test : testDataHolders) { + createTableTasks.add(() -> testdb.with(test.getCreateSqlQuery())); + enableCdcForTableTasks.add(() -> testdb.withCdcForTable(test.getNameSpace(), test.getNameWithTestPrefix(), null)); + } + executor.invokeAll(createTableTasks); + executor.invokeAll(enableCdcForTableTasks); + } + + protected void populateTables() throws Exception { + List> insertTasks = new ArrayList<>(); + List> waitForCdcRecordsTasks = new ArrayList<>(); + for (var test : testDataHolders) { + insertTasks.add(() -> { + this.database.query((ctx) -> { + List sql = test.getInsertSqlQueries(); + Objects.requireNonNull(ctx); + sql.forEach(ctx::fetch); + return null; + }); + return null; + }); + waitForCdcRecordsTasks.add(() -> testdb.waitForCdcRecords(test.getNameSpace(), test.getNameWithTestPrefix(), test.getExpectedValues().size())); + } + // executor.invokeAll(insertTasks); + executor.invokeAll(insertTasks); + executor.invokeAll(waitForCdcRecordsTasks); + } + + @Override + public boolean testCatalog() { + return true; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java new file mode 100644 index 0000000000000..534b978f1e62a --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; + +public class CloudDeploymentSslEnabledMssqlSourceAcceptanceTest extends MssqlSourceAcceptanceTest { + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + final var container = new MsSQLContainerFactory().shared(BaseImage.MSSQL_2022.reference); + testdb = new MsSQLTestDatabase(container); + testdb = testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2) + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1,'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');") + .with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2) + .with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3) + .with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3); + + } + + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingDeploymentMode(super.featureFlags(), "CLOUD"); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withEncrytedTrustServerCertificate() + .build(); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java new file mode 100644 index 0000000000000..bb48874fdb10b --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.protocol.models.v0.SyncMode.INCREMENTAL; +import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import io.airbyte.cdk.integrations.base.ssh.SshHelpers; +import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.ConnectorSpecification; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import io.airbyte.protocol.models.v0.SyncMode; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; + +public class MssqlSourceAcceptanceTest extends SourceAcceptanceTest { + + protected static final String SCHEMA_NAME = "dbo"; + protected static final String STREAM_NAME = "id_and_name"; + protected static final String STREAM_NAME2 = "starships"; + protected static final String STREAM_NAME3 = "stream3"; + + protected MsSQLTestDatabase testdb; + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException { + testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022) + .with("CREATE TABLE %s.%s (id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));", SCHEMA_NAME, STREAM_NAME) + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2) + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');") + .with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2) + .with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3) + .with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3); + + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } + + @Override + protected String getImageName() { + return "airbyte/source-mssql:dev"; + } + + @Override + protected ConnectorSpecification getSpec() throws Exception { + return SshHelpers.getSpecAndInjectSsh(); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .build(); + } + + @Override + protected ConfiguredAirbyteCatalog getConfiguredCatalog() { + return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( + new ConfiguredAirbyteStream() + .withSyncMode(INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME, SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, INCREMENTAL))), + new ConfiguredAirbyteStream() + .withSyncMode(INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME2, SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, INCREMENTAL))))); + } + + @Override + protected JsonNode getState() { + return Jsons.jsonNode(new HashMap<>()); + } + + @Test + protected void testAddNewStreamToExistingSync() throws Exception { + final List configuredAirbyteStreams = + Lists.newArrayList(CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME, + SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(INCREMENTAL) + .withCursorField(List.of("id")), + CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME2, + SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(INCREMENTAL) + .withCursorField(List.of("id"))); + final ConfiguredAirbyteCatalog configuredCatalogWithOneStream = + new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0))); + + // Start a sync with one stream + final List messages = runRead(withSourceDefinedCursors(configuredCatalogWithOneStream)); + final List recordMessages = filterRecords(messages); + final List stateMessages = filterStateMessages(messages); + final AirbyteStateMessage lastStateMessage = Iterables.getLast(stateMessages); + final AirbyteStreamState streamState = lastStateMessage.getStream(); + + assertEquals(3, recordMessages.size()); + assertEquals(1, stateMessages.size()); + assertEquals(STREAM_NAME, streamState.getStreamDescriptor().getName()); + assertEquals(SCHEMA_NAME, streamState.getStreamDescriptor().getNamespace()); + + final ConfiguredAirbyteCatalog configuredCatalogWithTwoStreams = + new ConfiguredAirbyteCatalog().withStreams(configuredAirbyteStreams); + + // Start another sync with a newly added stream + final List messages2 = runRead(configuredCatalogWithTwoStreams, Jsons.jsonNode(List.of(lastStateMessage))); + final List recordMessages2 = filterRecords(messages2); + final List stateMessages2 = filterStateMessages(messages2); + + assertEquals(4, recordMessages2.size()); + assertEquals(2, stateMessages2.size()); + + assertEquals(2, stateMessages2.size()); + assertEquals(STREAM_NAME, stateMessages2.get(0).getStream().getStreamDescriptor().getName()); + assertEquals(SCHEMA_NAME, stateMessages2.get(0).getStream().getStreamDescriptor().getNamespace()); + assertEquals(STREAM_NAME2, stateMessages2.get(1).getStream().getStreamDescriptor().getName()); + assertEquals(SCHEMA_NAME, stateMessages2.get(1).getStream().getStreamDescriptor().getNamespace()); + } + + @Test + protected void testNullValueConversion() throws Exception { + final List configuredAirbyteStreams = + Lists.newArrayList(CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME3, + SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING), + Field.of("userid", JsonSchemaType.NUMBER)) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(INCREMENTAL) + .withCursorField(List.of("id"))); + final ConfiguredAirbyteCatalog configuredCatalogWithOneStream = + new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0))); + + final List airbyteMessages = runRead(configuredCatalogWithOneStream, getState()); + final List recordMessages = filterRecords(airbyteMessages); + final List stateMessages = airbyteMessages + .stream() + .filter(m -> m.getType() == AirbyteMessage.Type.STATE) + .map(AirbyteMessage::getState) + .collect(Collectors.toList()); + assertEquals(recordMessages.size(), 1); + assertFalse(stateMessages.isEmpty(), "Reason"); + ObjectMapper mapper = new ObjectMapper(); + + assertTrue(recordMessages.get(0).getData().equals( + mapper.readTree("{\"id\":4, \"name\":\"voyager\", \"userid\":null}}"))); + + // when we run incremental sync again there should be no new records. Run a sync with the latest + // state message and assert no records were emitted. + JsonNode latestState = extractLatestState(stateMessages); + + testdb.getDatabase().query(c -> { + return c.query("INSERT INTO %s.%s (id, name) VALUES (5,'deep space nine');".formatted(SCHEMA_NAME, STREAM_NAME3)); + }).execute(); + + assert Objects.nonNull(latestState); + final List secondSyncRecords = filterRecords(runRead(configuredCatalogWithOneStream, latestState)); + assertFalse( + secondSyncRecords.isEmpty(), + "Expected the second incremental sync to produce records."); + assertTrue(secondSyncRecords.get(0).getData().equals( + mapper.readTree("{\"id\":5, \"name\":\"deep space nine\", \"userid\":null}}"))); + + } + + private List filterStateMessages(final List messages) { + return messages.stream().filter(r -> r.getType() == AirbyteMessage.Type.STATE).map(AirbyteMessage::getState) + .collect(Collectors.toList()); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java new file mode 100644 index 0000000000000..8b11db5c3e77c --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.Database; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; + +public class MssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest { + + @Override + protected Database setupDatabase() { + testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022); + return testdb.getDatabase(); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .build(); + } + + @Override + public boolean testCatalog() { + return true; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java new file mode 100644 index 0000000000000..7f1de60c6eacc --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import java.sql.Connection; +import java.sql.JDBCType; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class MssqlSourceOperationsTest { + + private final MssqlSourceOperations mssqlSourceOperations = new MssqlSourceOperations(); + + private MsSQLTestDatabase testdb; + + private final String cursorColumn = "cursor_column"; + + @BeforeEach + public void init() { + testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022); + } + + @AfterEach + public void tearDown() { + testdb.close(); + } + + @Test + public void setDateTimeOffsetColumnAsCursor() throws SQLException { + final String tableName = "datetimeoffset_table"; + final String createTableQuery = String.format("CREATE TABLE %s(id INTEGER PRIMARY KEY IDENTITY(1,1), %s DATETIMEOFFSET(7));", + tableName, + cursorColumn); + executeQuery(createTableQuery); + final List expectedRecords = new ArrayList<>(); + for (int i = 1; i <= 4; i++) { + final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + // Manually generate DATETIMEOFFSET data + final String cursorValue = String.format("'2023-0%s-10T10:00:00.100000Z'", i, i * 10); + jsonNode.put("id", i); + // Remove single quotes from string since the date being retrieved will not have quotes + jsonNode.put(cursorColumn, cursorValue.replaceAll("\'", "")); + final String insertQuery = String.format("INSERT INTO %s (%s) VALUES (CAST(%s as DATETIMEOFFSET))", tableName, cursorColumn, cursorValue); + + executeQuery(insertQuery); + expectedRecords.add(jsonNode); + } + final String cursorAnchorValue = "2023-01-01T00:00:00.000000+00:00"; + final List actualRecords = new ArrayList<>(); + try (final Connection connection = testdb.getContainer().createConnection("")) { + final PreparedStatement preparedStatement = connection.prepareStatement( + "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); + mssqlSourceOperations.setCursorField(preparedStatement, + 1, + JDBCType.TIMESTAMP_WITH_TIMEZONE, + cursorAnchorValue); + + try (final ResultSet resultSet = preparedStatement.executeQuery()) { + final int columnCount = resultSet.getMetaData().getColumnCount(); + while (resultSet.next()) { + final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + for (int i = 1; i <= columnCount; i++) { + mssqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); + } + actualRecords.add(jsonNode); + } + } + } + assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); + } + + protected void executeQuery(final String query) throws SQLException { + try (final Connection connection = testdb.getContainer().createConnection("")) { + connection.createStatement().execute(query); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java new file mode 100644 index 0000000000000..4990c606952a0 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.cdk.integrations.base.ssh.SshTunnel.TunnelMethod; + +public class SshKeyMssqlSourceAcceptanceTest extends AbstractSshMssqlSourceAcceptanceTest { + + @Override + public TunnelMethod getTunnelMethod() { + return TunnelMethod.SSH_KEY_AUTH; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java new file mode 100644 index 0000000000000..35b0b57bf6f80 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.cdk.integrations.base.ssh.SshTunnel.TunnelMethod; + +public class SshPasswordMssqlSourceAcceptanceTest extends AbstractSshMssqlSourceAcceptanceTest { + + @Override + public TunnelMethod getTunnelMethod() { + return TunnelMethod.SSH_PASSWORD_AUTH; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java new file mode 100644 index 0000000000000..baa670c759522 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; + +public class SslEnabledMssqlSourceAcceptanceTest extends MssqlSourceAcceptanceTest { + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withEncrytedTrustServerCertificate() + .build(); + } + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + final var container = new MsSQLContainerFactory().shared(BaseImage.MSSQL_2022.reference); + testdb = new MsSQLTestDatabase(container); + testdb = testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2) + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');") + .with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2) + .with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3) + .with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3); + + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/resources/dummy_config.json b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/resources/dummy_config.json new file mode 100644 index 0000000000000..1f42c042e7467 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/resources/dummy_config.json @@ -0,0 +1,7 @@ +{ + "host": "default", + "port": 5555, + "database": "default", + "username": "default", + "password": "default" +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/resources/expected_spec.json b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/resources/expected_spec.json new file mode 100644 index 0000000000000..e76ec614b9594 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-integration/resources/expected_spec.json @@ -0,0 +1,305 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/mssql", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MSSQL Source Spec", + "type": "object", + "required": ["host", "port", "database", "username", "password"], + "properties": { + "host": { + "description": "The hostname of the database.", + "title": "Host", + "type": "string", + "order": 0 + }, + "port": { + "description": "The port of the database.", + "title": "Port", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "examples": ["1433"], + "order": 1 + }, + "database": { + "description": "The name of the database.", + "title": "Database", + "type": "string", + "examples": ["master"], + "order": 2 + }, + "schemas": { + "title": "Schemas", + "description": "The list of schemas to sync from. Defaults to user. Case sensitive.", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0, + "uniqueItems": true, + "default": ["dbo"], + "order": 3 + }, + "username": { + "description": "The username which is used to access the database.", + "title": "Username", + "type": "string", + "order": 4 + }, + "password": { + "description": "The password associated with the username.", + "title": "Password", + "type": "string", + "airbyte_secret": true, + "order": 5 + }, + "jdbc_url_params": { + "title": "JDBC URL Params", + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "type": "string", + "order": 6 + }, + "ssl_method": { + "title": "SSL Method", + "type": "object", + "description": "The encryption method which is used when communicating with the database.", + "order": 7, + "oneOf": [ + { + "title": "Unencrypted", + "description": "Data transfer will not be encrypted.", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "const": "unencrypted" + } + } + }, + { + "title": "Encrypted (trust server certificate)", + "description": "Use the certificate provided by the server without verification. (For testing purposes only!)", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "const": "encrypted_trust_server_certificate" + } + } + }, + { + "title": "Encrypted (verify certificate)", + "description": "Verify and use the certificate provided by the server.", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "const": "encrypted_verify_certificate" + }, + "hostNameInCertificate": { + "title": "Host Name In Certificate", + "type": "string", + "description": "Specifies the host name of the server. The value of this property must match the subject property of the certificate.", + "order": 0 + }, + "certificate": { + "title": "Certificate", + "type": "string", + "description": "certificate of the server, or of the CA that signed the server certificate", + "order": 1, + "airbyte_secret": true, + "multiline": true + } + } + } + ] + }, + "replication_method": { + "type": "object", + "title": "Update Method", + "description": "Configures how data is extracted from the database.", + "default": "CDC", + "display_type": "radio", + "order": 8, + "oneOf": [ + { + "title": "Read Changes using Change Data Capture (CDC)", + "description": "Recommended - Incrementally reads new inserts, updates, and deletes using the SQL Server's change data capture feature. This must be enabled on your database.", + "required": ["method"], + "properties": { + "method": { + "type": "string", + "const": "CDC", + "order": 0 + }, + "initial_waiting_seconds": { + "type": "integer", + "title": "Initial Waiting Time in Seconds (Advanced)", + "description": "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds. Read about initial waiting time.", + "default": 300, + "min": 120, + "max": 3600, + "order": 3 + }, + "invalid_cdc_cursor_position_behavior": { + "type": "string", + "title": "Invalid CDC position behavior (Advanced)", + "description": "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.", + "enum": ["Fail sync", "Re-sync data"], + "default": "Fail sync", + "order": 4 + }, + "queue_size": { + "type": "integer", + "title": "Size of the queue (Advanced)", + "description": "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.", + "default": 10000, + "order": 5, + "min": 1000, + "max": 10000 + }, + "initial_load_timeout_hours": { + "type": "integer", + "title": "Initial Load Timeout in Hours (Advanced)", + "description": "The amount of time an initial load is allowed to continue for before catching up on CDC logs.", + "default": 8, + "min": 4, + "max": 24, + "order": 6 + } + } + }, + { + "title": "Scan Changes with User Defined Cursor", + "description": "Incrementally detects new inserts and updates using the cursor column chosen when configuring a connection (e.g. created_at, updated_at).", + "required": ["method"], + "properties": { + "method": { + "type": "string", + "const": "STANDARD", + "order": 0 + } + } + } + ] + }, + "tunnel_method": { + "type": "object", + "title": "SSH Tunnel Method", + "description": "Whether to initiate an SSH tunnel before connecting to the database, and if so, which kind of authentication to use.", + "oneOf": [ + { + "title": "No Tunnel", + "required": ["tunnel_method"], + "properties": { + "tunnel_method": { + "description": "No ssh tunnel needed to connect to database", + "type": "string", + "const": "NO_TUNNEL", + "order": 0 + } + } + }, + { + "title": "SSH Key Authentication", + "required": [ + "tunnel_method", + "tunnel_host", + "tunnel_port", + "tunnel_user", + "ssh_key" + ], + "properties": { + "tunnel_method": { + "description": "Connect through a jump server tunnel host using username and ssh key", + "type": "string", + "const": "SSH_KEY_AUTH", + "order": 0 + }, + "tunnel_host": { + "title": "SSH Tunnel Jump Server Host", + "description": "Hostname of the jump server host that allows inbound ssh tunnel.", + "type": "string", + "order": 1 + }, + "tunnel_port": { + "title": "SSH Connection Port", + "description": "Port on the proxy/jump server that accepts inbound ssh connections.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 22, + "examples": ["22"], + "order": 2 + }, + "tunnel_user": { + "title": "SSH Login Username", + "description": "OS-level username for logging into the jump server host.", + "type": "string", + "order": 3 + }, + "ssh_key": { + "title": "SSH Private Key", + "description": "OS-level user account ssh key credentials in RSA PEM format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )", + "type": "string", + "airbyte_secret": true, + "multiline": true, + "order": 4 + } + } + }, + { + "title": "Password Authentication", + "required": [ + "tunnel_method", + "tunnel_host", + "tunnel_port", + "tunnel_user", + "tunnel_user_password" + ], + "properties": { + "tunnel_method": { + "description": "Connect through a jump server tunnel host using username and password authentication", + "type": "string", + "const": "SSH_PASSWORD_AUTH", + "order": 0 + }, + "tunnel_host": { + "title": "SSH Tunnel Jump Server Host", + "description": "Hostname of the jump server host that allows inbound ssh tunnel.", + "type": "string", + "order": 1 + }, + "tunnel_port": { + "title": "SSH Connection Port", + "description": "Port on the proxy/jump server that accepts inbound ssh connections.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 22, + "examples": ["22"], + "order": 2 + }, + "tunnel_user": { + "title": "SSH Login Username", + "description": "OS-level username for logging into the jump server host", + "type": "string", + "order": 3 + }, + "tunnel_user_password": { + "title": "Password", + "description": "OS-level password for logging into the jump server host", + "type": "string", + "airbyte_secret": true, + "order": 4 + } + } + } + ] + } + } + }, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": [] +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java new file mode 100644 index 0000000000000..552b64136b297 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.Database; +import io.airbyte.cdk.db.factory.DSLContextFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.cdk.integrations.standardtest.source.performancetest.AbstractSourceFillDbWithTestData; +import io.airbyte.commons.json.Jsons; +import java.util.stream.Stream; +import org.jooq.DSLContext; +import org.junit.jupiter.params.provider.Arguments; + +public class FillMsSqlTestDbScriptTest extends AbstractSourceFillDbWithTestData { + + private JsonNode config; + private DSLContext dslContext; + + @Override + protected JsonNode getConfig() { + return config; + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) {} + + @Override + protected String getImageName() { + return "airbyte/source-mssql:dev"; + } + + @Override + protected Database setupDatabase(final String dbName) { + final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() + .put("method", "Standard") + .build()); + + config = Jsons.jsonNode(ImmutableMap.builder() + .put(JdbcUtils.HOST_KEY, "your_host") + .put(JdbcUtils.PORT_KEY, 1433) + .put(JdbcUtils.DATABASE_KEY, dbName) // set your db name + .put(JdbcUtils.USERNAME_KEY, "your_username") + .put(JdbcUtils.PASSWORD_KEY, "your_pass") + .put("replication_method", replicationMethod) + .build()); + + dslContext = DSLContextFactory.create( + config.get(JdbcUtils.USERNAME_KEY).asText(), + config.get(JdbcUtils.PASSWORD_KEY).asText(), + DatabaseDriver.MSSQLSERVER.getDriverClassName(), + String.format("jdbc:sqlserver://%s:%s;databaseName=%s;", + config.get(JdbcUtils.HOST_KEY).asText(), + config.get(JdbcUtils.PORT_KEY).asInt(), + dbName), + null); + + return new Database(dslContext); + } + + /** + * This is a data provider for fill DB script,, Each argument's group would be ran as a separate + * test. 1st arg - a name of DB that will be used in jdbc connection string. 2nd arg - a schemaName + * that will be ised as a NameSpace in Configured Airbyte Catalog. 3rd arg - a number of expected + * records retrieved in each stream. 4th arg - a number of messages batches + * (numberOfMessages*numberOfBatches, ex. 100*2=200 messages in total in each stream) 5th arg - a + * number of columns in each stream\table that will be use for Airbyte Cataloq configuration 6th arg + * - a number of streams to read in configured airbyte Catalog. Each stream\table in DB should be + * names like "test_0", "test_1",..., test_n. + */ + @Override + protected Stream provideParameters() { + return Stream.of(Arguments.of("your_db_name", "dbo", 100, 2, 240, 1000) // "dbo" is a default schema name in MsSQl DB + ); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java new file mode 100644 index 0000000000000..62876374b1661 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.standardtest.source.performancetest.AbstractSourcePerformanceTest; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import java.nio.file.Path; +import java.util.stream.Stream; +import org.junit.jupiter.params.provider.Arguments; + +public class MssqlSourcePerformanceTest extends AbstractSourcePerformanceTest { + + private static final String PERFORMANCE_SECRET_CREDS = "secrets/performance-config.json"; + + @Override + protected String getImageName() { + return "airbyte/source-mssql:dev"; + } + + @Override + protected void setupDatabase(final String dbName) { + final JsonNode plainConfig = Jsons.deserialize(IOs.readFile(Path.of(PERFORMANCE_SECRET_CREDS))); + + setConfig(Jsons.jsonNode(ImmutableMap.builder() + .put(JdbcUtils.HOST_KEY, plainConfig.get(JdbcUtils.HOST_KEY)) + .put(JdbcUtils.PORT_KEY, plainConfig.get(JdbcUtils.PORT_KEY)) + .put(JdbcUtils.DATABASE_KEY, dbName) + .put(JdbcUtils.USERNAME_KEY, plainConfig.get(JdbcUtils.USERNAME_KEY)) + .put(JdbcUtils.PASSWORD_KEY, plainConfig.get(JdbcUtils.PASSWORD_KEY)) + .build())); + } + + /** + * This is a data provider for performance tests, Each argument's group would be ran as a separate + * test. 1st arg - a name of DB that will be used in jdbc connection string. 2nd arg - a schemaName + * that will be used as a NameSpace in Configured Airbyte Catalog. 3rd arg - a number of expected + * records retrieved in each stream. 4th arg - a number of columns in each stream\table that will be + * use for Airbyte Cataloq configuration 5th arg - a number of streams to read in configured airbyte + * Catalog. Each stream\table in DB should be names like "test_0", "test_1",..., test_n. + */ + @Override + protected Stream provideParameters() { + return Stream.of( + Arguments.of("t1000_c240_r200", "dbo", 200, 240, 1000), + Arguments.of("t25_c8_r50k_s10kb", "dbo", 50000, 8, 25), + Arguments.of("t1000_c8_r10k_s500b", "dbo", 10000, 8, 1000)); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/sql/create_mssql_benchmarks.sql b/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/sql/create_mssql_benchmarks.sql new file mode 100644 index 0000000000000..e1e0870f27ed4 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test-performance/sql/create_mssql_benchmarks.sql @@ -0,0 +1,305 @@ +CREATE + PROCEDURE table_copy( + @tablecount INT + ) AS BEGIN +SET + nocount ON; + + DECLARE @v_max_table INT; + +DECLARE @v_counter_table INT; + +DECLARE @tnamee VARCHAR(255); +SET +@v_max_table = @tablecount; +SET +@v_counter_table = 1; + +while @v_counter_table < @v_max_table BEGIN +SET +@tnamee = concat( + 'SELECT * INTO test_', + @v_counter_table, + ' FROM test;' +); + +EXEC(@tnamee); +SET +@v_counter_table = @v_counter_table + 1; +END; +END; + +GO -- +CREATE + PROCEDURE insert_rows( + @allrows INT, + @insertcount INT, + @value NVARCHAR(MAX) + ) AS BEGIN +SET + nocount ON; + + DECLARE @dummyIpsum VARCHAR(255) DECLARE @fieldText NVARCHAR(MAX) +SET + @fieldText = @value DECLARE @vmax INT; + +DECLARE @vmaxx INT; + +DECLARE @vmaxoneinsert INT; + +DECLARE @counter INT; + +DECLARE @lastinsertcounter INT; + +DECLARE @lastinsert INT; + +DECLARE @fullloop INT; + +DECLARE @fullloopcounter INT; +SET +@vmax = @allrows; +SET +@vmaxx = @allrows; +SET +@vmaxoneinsert = @insertcount; +SET +@counter = 1; +SET +@lastinsertcounter = 1; +SET +@lastinsert = 0; +SET +@fullloop = 0; +SET +@fullloopcounter = 0; +SET +@dummyIpsum = '''dummy_ipsum''' while @vmaxx <= @vmaxoneinsert BEGIN +SET +@vmaxoneinsert = @vmaxx; +SET +@fullloop = @fullloop + 1; +SET +@vmaxx = @vmaxx + 1; +END; + +while @vmax > @vmaxoneinsert BEGIN +SET +@fullloop = @fullloop + 1; +SET +@vmax = @vmax - @vmaxoneinsert; +SET +@lastinsert = @vmax; +END; + +DECLARE @insertTable NVARCHAR(MAX) +SET +@insertTable = CONVERT( + NVARCHAR(MAX), + 'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values (' +); + +while @counter < @vmaxoneinsert BEGIN +SET +@insertTable = CONVERT( + NVARCHAR(MAX), + concat( + @insertTable, + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @fieldText, + ', CURRENT_TIMESTAMP), (' + ) +); +SET +@counter = @counter + 1; +END; +SET +@insertTable = CONVERT( + NVARCHAR(MAX), + concat( + @insertTable, + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @fieldText, + ', CURRENT_TIMESTAMP);' + ) +); + +while @vmax < 1 BEGIN +SET +@fullloop = 0 +SET +@vmax = 1 +END; + +while @fullloopcounter < @fullloop BEGIN EXEC(@insertTable); +SET +@fullloopcounter = @fullloopcounter + 1; +END; + +DECLARE @insertTableLasted NVARCHAR(MAX); +SET +@insertTableLasted = CONVERT( + NVARCHAR(MAX), + 'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values (' +); + +while @lastinsertcounter < @lastinsert BEGIN +SET +@insertTableLasted = CONVERT( + NVARCHAR(MAX), + concat( + @insertTableLasted, + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @fieldText, + ', CURRENT_TIMESTAMP), (' + ) +); +SET +@lastinsertcounter = @lastinsertcounter + 1; +END; +SET +@insertTableLasted = CONVERT( + NVARCHAR(MAX), + concat( + @insertTableLasted, + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @dummyIpsum, + ', ', + @fieldText, + ', CURRENT_TIMESTAMP);' + ) +); + +while @lastinsert > 0 BEGIN EXEC(@insertTableLasted); +SET +@lastinsert = 0; +END; +END; + +GO -- +CREATE + PROCEDURE table_create( + @val INT + ) AS BEGIN +SET + nocount ON; + + -- SQLINES LICENSE FOR EVALUATION USE ONLY +CREATE + TABLE + test( + id INT CHECK( + id > 0 + ) NOT NULL IDENTITY PRIMARY KEY, + varchar1 VARCHAR(255), + varchar2 VARCHAR(255), + varchar3 VARCHAR(255), + varchar4 VARCHAR(255), + varchar5 VARCHAR(255), + longtextfield nvarchar(MAX), + timestampfield datetime2(0) + ); + +DECLARE @extraSmallText NVARCHAR(MAX); + +DECLARE @smallText NVARCHAR(MAX); + +DECLARE @regularText NVARCHAR(MAX); + +DECLARE @largeText NVARCHAR(MAX); + +DECLARE @someText nvarchar(MAX); + +SELECT + @someText = N'some text, some text, '; +SET +@extraSmallText = N'''test weight 50b - some text, some text, some text'''; +SET +@smallText = N'''test weight 500b - '; +SET +@regularText = N'''test weight 10kb - '; +SET +@largeText = N'''test weight 100kb - '; + +SELECT + @smallText = @smallText + REPLICATE( + @someText, + 20 + )+ N''''; + +SELECT + @regularText = @regularText + REPLICATE( + @someText, + 590 + )+ N'some text'''; + +SELECT + @largeText = @largeText + REPLICATE( + @someText, + 4450 + )+ N'some text'''; + +) -- TODO: change the following @allrows to control the number of records with different sizes +-- number of 50B records +EXEC insert_rows @allrows = 0, +@insertcount = 998, +@value = @extraSmallText -- number of 500B records +EXEC insert_rows @allrows = 0, +@insertcount = 998, +@value = @smallText -- number of 10Kb records +EXEC insert_rows @allrows = 0, +@insertcount = 998, +@value = @regularText -- number of 100Kb records +EXEC insert_rows @allrows = 0, +@insertcount = 98, +@value = @largeText +END; + +GO -- +EXEC table_create @val = 0 DROP + PROCEDURE IF EXISTS insert_rows; + +DROP + PROCEDURE IF EXISTS table_create; + +-- TODO: change the value to control the number of tables +EXEC table_copy @tablecount = 1; + +DROP + PROCEDURE IF EXISTS table_copy; + +EXEC sp_rename 'test', +'test_0'; \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java new file mode 100644 index 0000000000000..bd0b289f123eb --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java @@ -0,0 +1,686 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT; +import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_DEFAULT_CURSOR; +import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_EVENT_SERIAL_NO; +import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_LSN; +import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET; +import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY; +import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.ORDERED_COL_STATE_TYPE; +import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY; +import static org.awaitility.Awaitility.await; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Streams; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase; +import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; +import io.airbyte.cdk.integrations.JdbcConnector; +import io.airbyte.cdk.integrations.debezium.CdcSourceTest; +import io.airbyte.cdk.integrations.debezium.CdcTargetPosition; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteGlobalState; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.StreamDescriptor; +import io.airbyte.protocol.models.v0.SyncMode; +import io.debezium.connector.sqlserver.Lsn; +import java.sql.SQLException; +import java.time.Duration; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import javax.sql.DataSource; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.TestInstance.Lifecycle; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@TestInstance(Lifecycle.PER_METHOD) +@Execution(ExecutionMode.CONCURRENT) +@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH") +public class CdcMssqlSourceTest extends CdcSourceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(CdcSourceTest.class); + + static private final String CDC_ROLE_NAME = "cdc_selector"; + + static private final String TEST_USER_NAME_PREFIX = "cdc_test_user"; + + private DataSource testDataSource; + + protected final String testUserName() { + return testdb.withNamespace(TEST_USER_NAME_PREFIX); + } + + @Override + protected MsSQLTestDatabase createTestDatabase() { + return MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT) + .withWaitUntilAgentRunning() + .withCdc(); + } + + @Override + protected MssqlSource source() { + return new MssqlSource(); + } + + @Override + protected JsonNode config() { + return testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testUserName()) + .with(JdbcUtils.PASSWORD_KEY, testdb.getPassword()) + .withSchemas(modelsSchema(), randomSchema()) + .withCdcReplication() + .withoutSsl() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); + } + + @Override + protected void assertExpectedStateMessageCountMatches(final List stateMessages, long totalCount) { + AtomicLong count = new AtomicLong(0L); + stateMessages.stream().forEach( + stateMessage -> count.addAndGet(stateMessage.getSourceStats() != null ? stateMessage.getSourceStats().getRecordCount().longValue() : 0L)); + assertEquals(totalCount, count.get()); + } + + @Override + @BeforeEach + protected void setup() { + testdb = createTestDatabase(); + createTables(); + // Enables cdc on MODELS_SCHEMA.MODELS_STREAM_NAME, giving CDC_ROLE_NAME select access. + testdb + .withCdcForTable(modelsSchema(), MODELS_STREAM_NAME, CDC_ROLE_NAME) + .withCdcForTable(randomSchema(), RANDOM_TABLE_NAME, CDC_ROLE_NAME); + + // Create a test user to be used by the source, with proper permissions. + testdb + .with("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", testUserName(), testdb.getPassword(), testdb.getDatabaseName()) + .with("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", testUserName(), testUserName()) + .with("REVOKE ALL FROM %s CASCADE;", testUserName()) + .with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testUserName()) + .with("GRANT SELECT ON SCHEMA :: [%s] TO %s", modelsSchema(), testUserName()) + .with("GRANT SELECT ON SCHEMA :: [%s] TO %s", randomSchema(), testUserName()) + .with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName()) + .with("USE [master]") + .with("GRANT VIEW SERVER STATE TO %s", testUserName()) + .with("USE [%s]", testdb.getDatabaseName()) + .with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName()); + + populateTables(); + waitForCdcRecords(); + testDataSource = createTestDataSource(); + } + + public void waitForCdcRecords() { + testdb.waitForCdcRecords(modelsSchema(), MODELS_STREAM_NAME, MODEL_RECORDS.size()); + testdb.waitForCdcRecords(randomSchema(), RANDOM_TABLE_NAME, MODEL_RECORDS_RANDOM.size()); + + } + + protected DataSource createTestDataSource() { + return DataSourceFactory.create( + testUserName(), + testdb.getPassword(), + testdb.getDatabaseDriver().getDriverClassName(), + testdb.getJdbcUrl(), + Map.of("encrypt", "false", "trustServerCertificate", "true"), + JdbcConnector.CONNECT_TIMEOUT_DEFAULT); + } + + @Override + @AfterEach + protected void tearDown() { + try { + DataSourceFactory.close(testDataSource); + } catch (final Exception e) { + throw new RuntimeException(e); + } + super.tearDown(); + } + + private JdbcDatabase testDatabase() { + return new DefaultJdbcDatabase(testDataSource); + } + + // TODO : Delete this Override when MSSQL supports individual table snapshot + @Override + public void newTableSnapshotTest() { + // Do nothing + } + + @Override + protected void addIsResumableFlagForNonPkTable(final AirbyteStream stream) { + stream.setIsResumable(false); + } + + // Utilize the setup to do test on MssqlDebeziumStateUtil. + @Test + public void testCdcSnapshot() { + + JdbcDatabase testDatabase = testDatabase(); + testDatabase.setSourceConfig(config()); + testDatabase.setDatabaseConfig(source().toDatabaseConfig(config())); + + JsonNode debeziumState = + MssqlDebeziumStateUtil.constructInitialDebeziumState(MssqlCdcHelper.getDebeziumProperties(testDatabase, getConfiguredCatalog(), true), + getConfiguredCatalog(), testDatabase); + + Assertions.assertEquals(3, Jsons.object(debeziumState, Map.class).size()); + Assertions.assertTrue(debeziumState.has("is_compressed")); + Assertions.assertFalse(debeziumState.get("is_compressed").asBoolean()); + Assertions.assertTrue(debeziumState.has("mssql_db_history")); + Assertions.assertNotNull(debeziumState.get("mssql_db_history")); + Assertions.assertTrue(debeziumState.has("mssql_cdc_offset")); + } + + // Tests even with consistent inserting operations, CDC snapshot and incremental load will not lose + // data. + @Test + @Timeout(value = 5, + unit = TimeUnit.MINUTES) + public void testCdcNotLoseDataWithConsistentWriting() throws Exception { + ExecutorService executor = Executors.newFixedThreadPool(10); + + // Inserting 50 records in 10 seconds. + // Intention is to insert records while we are running the first snapshot read. And we check with + // the first snapshot read operations + // and a following incremental read operation, we will be able to capture all data. + int numberOfRecordsToInsert = 50; + var insertingProcess = executor.submit(() -> { + for (int i = 0; i < numberOfRecordsToInsert; i++) { + testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", + modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019 + i, i, "car description"); + try { + Thread.sleep(200); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }); + + final AutoCloseableIterator read1 = source() + .read(config(), getConfiguredCatalog(), null); + final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); + final Set recordMessages = extractRecordMessages(actualRecords1); + final List stateMessagesFromFirstSync = extractStateMessages(actualRecords1); + final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessagesFromFirstSync.get(stateMessagesFromFirstSync.size() - 1))); + // Make sure we have finished inserting process and read from previous state. + insertingProcess.get(); + + final AutoCloseableIterator read2 = source() + .read(config(), getConfiguredCatalog(), state); + final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); + + recordMessages.addAll(extractRecordMessages(actualRecords2)); + + final Set ids = recordMessages.stream().map(message -> message.getData().get("id").intValue()).collect(Collectors.toSet()); + // Originally in setup we have inserted 6 records in the table. + assertEquals(ids.size(), numberOfRecordsToInsert + 6); + } + + @Override + protected String columnClause(final Map columnsWithDataType, final Optional primaryKey) { + final StringBuilder columnClause = new StringBuilder(); + int i = 0; + for (final Map.Entry column : columnsWithDataType.entrySet()) { + columnClause.append(column.getKey()); + columnClause.append(" "); + columnClause.append(column.getValue()); + if (primaryKey.isPresent() && primaryKey.get().equals(column.getKey())) { + columnClause.append(" PRIMARY KEY"); + } + if (i < (columnsWithDataType.size() - 1)) { + columnClause.append(","); + columnClause.append(" "); + } + i++; + } + return columnClause.toString(); + } + + @Test + void testAssertCdcEnabledInDb() { + // since we enable cdc in setup, assert that we successfully pass this first + assertDoesNotThrow(() -> source().assertCdcEnabledInDb(config(), testDatabase())); + // then disable cdc and assert the check fails + testdb.withoutCdc(); + assertThrows(RuntimeException.class, () -> source().assertCdcEnabledInDb(config(), testDatabase())); + } + + @Test + void testAssertCdcSchemaQueryable() { + // correct access granted by setup so assert check passes + assertDoesNotThrow(() -> source().assertCdcSchemaQueryable(config(), testDatabase())); + // now revoke perms and assert that check fails + testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testUserName()); + assertThrows(com.microsoft.sqlserver.jdbc.SQLServerException.class, + () -> source().assertCdcSchemaQueryable(config(), testDatabase())); + } + + @Test + void testCdcCheckOperationsWithDot() throws Exception { + final String dbNameWithDot = testdb.getDatabaseName().replace("_", "."); + testdb.with("CREATE DATABASE [%s];", dbNameWithDot) + .with("USE [%s]", dbNameWithDot) + .with("EXEC sys.sp_cdc_enable_db;"); + final AirbyteConnectionStatus status = source().check(config()); + assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.SUCCEEDED); + } + + // todo: check LSN returned is actually the max LSN + // todo: check we fail as expected under certain conditions + @Test + void testGetTargetPosition() throws Exception { + // check that getTargetPosition returns higher Lsn after inserting new row + testdb.withWaitUntilMaxLsnAvailable(); + final Lsn firstLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn; + testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", + modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019, 1, "another car"); + // Wait for Agent capture job to log CDC change. + await().atMost(Duration.ofSeconds(45)).until(() -> { + final Lsn secondLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn; + return secondLsn.compareTo(firstLsn) > 0; + }); + } + + // Remove all timestamp related fields in shared state. We want to make sure other information will + // not change. + private void pruneSharedStateTimestamp(final JsonNode rootNode) throws Exception { + ObjectMapper mapper = new ObjectMapper(); + + // Navigate to the specific node + JsonNode historyNode = rootNode.path("state").path("mssql_db_history"); + if (historyNode.isMissingNode()) { + return; // Node not found, nothing to do + } + String historyJson = historyNode.asText(); + JsonNode historyJsonNode = mapper.readTree(historyJson); + + ObjectNode objectNode = (ObjectNode) historyJsonNode; + objectNode.remove("ts_ms"); + + if (objectNode.has("position") && objectNode.get("position").has("ts_sec")) { + ((ObjectNode) objectNode.get("position")).remove("ts_sec"); + } + + JsonNode offsetNode = rootNode.path("state").path("mssql_cdc_offset"); + JsonNode offsetJsonNode = mapper.readTree(offsetNode.asText()); + if (offsetJsonNode.has("ts_sec")) { + ((ObjectNode) offsetJsonNode).remove("ts_sec"); + } + + // Replace the original string with the modified one + ((ObjectNode) rootNode.path("state")).put("mssql_db_history", mapper.writeValueAsString(historyJsonNode)); + ((ObjectNode) rootNode.path("state")).put("mssql_cdc_offset", mapper.writeValueAsString(offsetJsonNode)); + } + + @Test + public void testTwoStreamSync() throws Exception { + // Add another stream models_2 and read that one as well. + final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog()); + + final List MODEL_RECORDS_2 = ImmutableList.of( + Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")), + Jsons.jsonNode(ImmutableMap.of(COL_ID, 120, COL_MAKE_ID, 1, COL_MODEL, "Focus-2")), + Jsons.jsonNode(ImmutableMap.of(COL_ID, 130, COL_MAKE_ID, 1, COL_MODEL, "Ranger-2")), + Jsons.jsonNode(ImmutableMap.of(COL_ID, 140, COL_MAKE_ID, 2, COL_MODEL, "GLA-2")), + Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")), + Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2"))); + + testdb.with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME + "_2", + columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); + + for (final JsonNode recordJson : MODEL_RECORDS_2) { + writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME + "_2", COL_ID, + COL_MAKE_ID, COL_MODEL); + } + + final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream() + .withStream(CatalogHelpers.createAirbyteStream( + MODELS_STREAM_NAME + "_2", + modelsSchema(), + Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), + Field.of(COL_MODEL, JsonSchemaType.STRING)) + .withSupportedSyncModes( + Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID)))); + airbyteStream.setSyncMode(SyncMode.INCREMENTAL); + + final List streams = configuredCatalog.getStreams(); + streams.add(airbyteStream); + configuredCatalog.withStreams(streams); + + final AutoCloseableIterator read1 = source() + .read(config(), configuredCatalog, null); + final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); + + final Set recordMessages1 = extractRecordMessages(actualRecords1); + final List stateMessages1 = extractStateMessages(actualRecords1); + assertEquals(13, stateMessages1.size()); + assertExpectedStateMessagesWithTotalCount(stateMessages1, 12); + + JsonNode sharedState = null; + StreamDescriptor firstStreamInState = null; + for (int i = 0; i < stateMessages1.size(); i++) { + final AirbyteStateMessage stateMessage = stateMessages1.get(i); + assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType()); + final AirbyteGlobalState global = stateMessage.getGlobal(); + assertNotNull(global.getSharedState()); + if (Objects.isNull(sharedState)) { + ObjectMapper mapper = new ObjectMapper(); + sharedState = mapper.valueToTree(global.getSharedState()); + pruneSharedStateTimestamp(sharedState); + } else { + ObjectMapper mapper = new ObjectMapper(); + var newSharedState = mapper.valueToTree(global.getSharedState()); + pruneSharedStateTimestamp(newSharedState); + assertEquals(sharedState, newSharedState); + } + + if (Objects.isNull(firstStreamInState)) { + assertEquals(1, global.getStreamStates().size()); + firstStreamInState = global.getStreamStates().get(0).getStreamDescriptor(); + } + + if (i <= 4) { + // First 4 state messages are pk state + assertEquals(1, global.getStreamStates().size()); + final AirbyteStreamState streamState = global.getStreamStates().get(0); + assertTrue(streamState.getStreamState().has(STATE_TYPE_KEY)); + assertEquals(ORDERED_COL_STATE_TYPE, streamState.getStreamState().get(STATE_TYPE_KEY).asText()); + } else if (i == 5) { + // 5th state message is the final state message emitted for the stream + assertEquals(1, global.getStreamStates().size()); + final AirbyteStreamState streamState = global.getStreamStates().get(0); + assertFalse(streamState.getStreamState().has(STATE_TYPE_KEY)); + } else if (i <= 10) { + // 6th to 10th is the primary_key state message for the 2nd stream but final state message for 1st + // stream + assertEquals(2, global.getStreamStates().size()); + final StreamDescriptor finalFirstStreamInState = firstStreamInState; + global.getStreamStates().forEach(c -> { + if (c.getStreamDescriptor().equals(finalFirstStreamInState)) { + assertFalse(c.getStreamState().has(STATE_TYPE_KEY)); + } else { + assertTrue(c.getStreamState().has(STATE_TYPE_KEY)); + assertEquals(ORDERED_COL_STATE_TYPE, c.getStreamState().get(STATE_TYPE_KEY).asText()); + } + }); + } else { + // last 2 state messages don't contain primary_key info cause primary_key sync should be complete + assertEquals(2, global.getStreamStates().size()); + global.getStreamStates().forEach(c -> assertFalse(c.getStreamState().has(STATE_TYPE_KEY))); + } + } + + final Set names = new HashSet<>(STREAM_NAMES); + names.add(MODELS_STREAM_NAME + "_2"); + assertExpectedRecords(Streams.concat(MODEL_RECORDS_2.stream(), MODEL_RECORDS.stream()) + .collect(Collectors.toSet()), + recordMessages1, + names, + names, + modelsSchema()); + + assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()), firstStreamInState); + + // Triggering a sync with a primary_key state for 1 stream and complete state for other stream + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); + final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); + + final List stateMessages2 = extractStateMessages(actualRecords2); + + assertEquals(6, stateMessages2.size()); + // State was reset to the 7th; thus 5 remaining records were expected to be reloaded. + assertExpectedStateMessagesWithTotalCount(stateMessages2, 5); + for (int i = 0; i < stateMessages2.size(); i++) { + final AirbyteStateMessage stateMessage = stateMessages2.get(i); + assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType()); + final AirbyteGlobalState global = stateMessage.getGlobal(); + assertNotNull(global.getSharedState()); + assertEquals(2, global.getStreamStates().size()); + + if (i <= 4) { + final StreamDescriptor finalFirstStreamInState = firstStreamInState; + global.getStreamStates().forEach(c -> { + // First 5 state messages are primary_key state for the stream that didn't complete primary_key sync + // the first time + if (c.getStreamDescriptor().equals(finalFirstStreamInState)) { + assertFalse(c.getStreamState().has(STATE_TYPE_KEY)); + } else { + assertTrue(c.getStreamState().has(STATE_TYPE_KEY)); + assertEquals(ORDERED_COL_STATE_TYPE, c.getStreamState().get(STATE_TYPE_KEY).asText()); + } + }); + } else { + // last state messages doesn't contain primary_key info cause primary_key sync should be complete + global.getStreamStates().forEach(c -> assertFalse(c.getStreamState().has(STATE_TYPE_KEY))); + } + } + + final Set recordMessages2 = extractRecordMessages(actualRecords2); + assertEquals(5, recordMessages2.size()); + assertExpectedRecords(new HashSet<>(MODEL_RECORDS_2.subList(1, MODEL_RECORDS_2.size())), + recordMessages2, + names, + names, + modelsSchema()); + } + + protected void assertExpectedStateMessagesWithTotalCount(final List stateMessages, final long totalRecordCount) { + long actualRecordCount = 0L; + for (final AirbyteStateMessage message : stateMessages) { + actualRecordCount += message.getSourceStats().getRecordCount(); + } + assertEquals(actualRecordCount, totalRecordCount); + } + + @Override + protected void removeCDCColumns(final ObjectNode data) { + data.remove(CDC_LSN); + data.remove(CDC_UPDATED_AT); + data.remove(CDC_DELETED_AT); + data.remove(CDC_EVENT_SERIAL_NO); + data.remove(CDC_DEFAULT_CURSOR); + } + + @Override + protected MssqlCdcTargetPosition cdcLatestTargetPosition() { + testdb.withWaitUntilMaxLsnAvailable(); + final JdbcDatabase jdbcDatabase = new StreamingJdbcDatabase( + testDataSource, + new MssqlSourceOperations(), + AdaptiveStreamingQueryConfig::new); + return MssqlCdcTargetPosition.getTargetPosition(jdbcDatabase, testdb.getDatabaseName()); + } + + @Override + protected MssqlCdcTargetPosition extractPosition(final JsonNode record) { + return new MssqlCdcTargetPosition(Lsn.valueOf(record.get(CDC_LSN).asText())); + } + + @Override + protected void assertNullCdcMetaData(final JsonNode data) { + assertNull(data.get(CDC_LSN)); + assertNull(data.get(CDC_UPDATED_AT)); + assertNull(data.get(CDC_DELETED_AT)); + assertNull(data.get(CDC_EVENT_SERIAL_NO)); + assertNull(data.get(CDC_DEFAULT_CURSOR)); + } + + @Override + protected void assertCdcMetaData(final JsonNode data, final boolean deletedAtNull) { + assertNotNull(data.get(CDC_LSN)); + assertNotNull(data.get(CDC_EVENT_SERIAL_NO)); + assertNotNull(data.get(CDC_UPDATED_AT)); + assertNotNull(data.get(CDC_DEFAULT_CURSOR)); + if (deletedAtNull) { + assertTrue(data.get(CDC_DELETED_AT).isNull()); + } else { + assertFalse(data.get(CDC_DELETED_AT).isNull()); + } + } + + @Override + protected void addCdcMetadataColumns(final AirbyteStream stream) { + final ObjectNode jsonSchema = (ObjectNode) stream.getJsonSchema(); + final ObjectNode properties = (ObjectNode) jsonSchema.get("properties"); + + final JsonNode airbyteIntegerType = Jsons.jsonNode(ImmutableMap.of("type", "number", "airbyte_type", "integer")); + final JsonNode stringType = Jsons.jsonNode(ImmutableMap.of("type", "string")); + properties.set(CDC_LSN, stringType); + properties.set(CDC_UPDATED_AT, stringType); + properties.set(CDC_DELETED_AT, stringType); + properties.set(CDC_EVENT_SERIAL_NO, stringType); + properties.set(CDC_DEFAULT_CURSOR, airbyteIntegerType); + + } + + @Override + protected void addCdcDefaultCursorField(final AirbyteStream stream) { + if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) { + stream.setDefaultCursorField(ImmutableList.of(CDC_DEFAULT_CURSOR)); + } + } + + @Override + protected void assertExpectedStateMessages(final List stateMessages) { + assertEquals(7, stateMessages.size()); + assertStateTypes(stateMessages, 4); + } + + @Override + protected void assertExpectedStateMessagesFromIncrementalSync(final List stateMessages) { + assertEquals(1, stateMessages.size()); + assertNotNull(stateMessages.get(0).getData()); + for (final AirbyteStateMessage stateMessage : stateMessages) { + assertNotNull(stateMessage.getData().get("cdc_state").get("state").get(MSSQL_CDC_OFFSET)); + assertNotNull(stateMessage.getData().get("cdc_state").get("state").get(MSSQL_DB_HISTORY)); + } + } + + @Override + protected void assertExpectedStateMessagesForNoData(final List stateMessages) { + assertEquals(2, stateMessages.size()); + } + + @Override + protected void assertExpectedStateMessagesForRecordsProducedDuringAndAfterSync(final List stateAfterFirstBatch) { + assertEquals(27, stateAfterFirstBatch.size()); + assertStateTypes(stateAfterFirstBatch, 24); + } + + private void assertStateTypes(final List stateMessages, final int indexTillWhichExpectOcState) { + JsonNode sharedState = null; + LOGGER.info("*** states to assert: {}", Arrays.deepToString(stateMessages.toArray())); + for (int i = 0; i < stateMessages.size(); i++) { + final AirbyteStateMessage stateMessage = stateMessages.get(i); + assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType()); + final AirbyteGlobalState global = stateMessage.getGlobal(); + assertNotNull(global.getSharedState()); + if (Objects.isNull(sharedState)) { + sharedState = global.getSharedState(); + } else { + assertEquals(sharedState, global.getSharedState(), "states were " + Arrays.deepToString(stateMessages.toArray())); + // assertEquals(sharedState.toString().replaceAll("ts_ms\\\\\":\\d+", ""), + // global.getSharedState().toString().replaceAll("ts_ms\\\\\":\\d+", "")); + } + assertEquals(1, global.getStreamStates().size()); + final AirbyteStreamState streamState = global.getStreamStates().get(0); + if (i <= indexTillWhichExpectOcState) { + assertTrue(streamState.getStreamState().has(STATE_TYPE_KEY)); + assertEquals(ORDERED_COL_STATE_TYPE, streamState.getStreamState().get(STATE_TYPE_KEY).asText()); + } else { + assertFalse(streamState.getStreamState().has(STATE_TYPE_KEY)); + } + } + } + + @Override + protected void compareTargetPositionFromTheRecordsWithTargetPostionGeneratedBeforeSync(final CdcTargetPosition targetPosition, + final AirbyteRecordMessage record) { + // The LSN from records should be either equal or grater than the position value before the sync + // started. + // Since we're using shared containers, the current LSN can move forward without any data + // modifications + // (INSERT, UPDATE, DELETE) in the current DB + assert targetPosition instanceof MssqlCdcTargetPosition; + assertTrue(extractPosition(record.getData()).targetLsn.compareTo(((MssqlCdcTargetPosition) targetPosition).targetLsn) >= 0); + } + + protected void waitForCdcRecords(String schemaName, String tableName, int recordCount) + throws Exception { + testdb.waitForCdcRecords(schemaName, tableName, recordCount); + } + + protected void deleteCommand(final String streamName) { + String selectCountSql = "SELECT COUNT(*) FROM %s.%s".formatted(modelsSchema(), streamName); + try { + int rowCount = testdb.query(ctx -> ctx.fetch(selectCountSql)).get(0).get(0, Integer.class); + LOGGER.info("deleting all {} rows from table {}.{}", rowCount, modelsSchema(), streamName); + super.deleteCommand(streamName); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + @Override + protected boolean supportResumableFullRefresh() { + return true; + } + + @Override + protected void assertExpectedStateMessagesForFullRefresh(final List stateMessages) { + // Full refresh will only send 6 state messages - one for each record (including the final one). + assertEquals(6, stateMessages.size()); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java new file mode 100644 index 0000000000000..98358d652d3c1 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.JdbcConnector; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.CertificateKey; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Map; +import javax.sql.DataSource; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +@TestInstance(Lifecycle.PER_METHOD) +@Execution(ExecutionMode.CONCURRENT) +@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH") +public class CdcMssqlSslSourceTest extends CdcMssqlSourceTest { + + @Override + final protected MsSQLTestDatabase createTestDatabase() { + final var testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT, ContainerModifier.WITH_SSL_CERTIFICATES); + return testdb.withWaitUntilAgentRunning() + .withCdc(); + } + + @Override + protected DataSource createTestDataSource() { + return DataSourceFactory.create( + testUserName(), + testdb.getPassword(), + testdb.getDatabaseDriver().getDriverClassName(), + testdb.getJdbcUrl(), + Map.of("encrypt", "true", "databaseName", testdb.getDatabaseName(), "trustServerCertificate", "true"), + JdbcConnector.CONNECT_TIMEOUT_DEFAULT); + } + + @Override + protected JsonNode config() { + final String containerIp; + try { + containerIp = InetAddress.getByName(testdb.getContainer().getHost()) + .getHostAddress(); + } catch (final UnknownHostException e) { + throw new RuntimeException(e); + } + final String certificate = testdb.getCertificate(CertificateKey.SERVER); + return testdb.configBuilder() + .withEncrytedVerifyServerCertificate(certificate, testdb.getContainer().getHost()) + .with(JdbcUtils.HOST_KEY, containerIp) + .with(JdbcUtils.PORT_KEY, testdb.getContainer().getFirstMappedPort()) + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testUserName()) + .with(JdbcUtils.PASSWORD_KEY, testdb.getPassword()) + .withSchemas(modelsSchema(), randomSchema()) + .withCdcReplication() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java new file mode 100644 index 0000000000000..f919478751fcd --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED; +import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET; +import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateGeneratorUtils; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteCatalog; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteMessage.Type; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.SyncMode; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CdcStateCompressionTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(CdcStateCompressionTest.class); + + static private final String CDC_ROLE_NAME = "cdc_selector"; + + static private final String TEST_USER_NAME_PREFIX = "cdc_test_user"; + + static private final String TEST_SCHEMA = "test_schema"; + + static private final int TEST_TABLES = 4; + + // SQLServer tables can't have more than 1024 columns. + static private final int ADDED_COLUMNS = 1000; + + private MsSQLTestDatabase testdb; + private final ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + private static final String ALTER_TABLE_ADD_COLUMN_SQL; + static { + StringBuilder sb = new StringBuilder(); + sb.append("ALTER TABLE ").append(TEST_SCHEMA).append(".%s ADD"); + for (int j = 0; j < ADDED_COLUMNS; j++) { + sb.append((j > 0) ? ", " : " ") + // Sqlserver column names can't be longer than 128 characters + .append("rather_long_column_name_________________________________________________________________________________________").append(j) + .append(" INT NULL"); + } + ALTER_TABLE_ADD_COLUMN_SQL = sb.toString(); + } + + @BeforeEach + public void setup() throws Exception { + testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022, MsSQLTestDatabase.ContainerModifier.AGENT) + .withWaitUntilAgentRunning() + .withCdc(); + + // Create a test schema and a bunch of test tables with CDC enabled. + // Insert one row in each table so that they're not empty. + testdb.with("CREATE SCHEMA %s;", TEST_SCHEMA); + List> createAndPopulateTableTasks = new ArrayList<>(); + List> waitForCdcRecordTasks = new ArrayList<>(); + List> alterTabletasks = new ArrayList<>(); + List> enableTableCdctasks = new ArrayList<>(); + List> disableTableCdctasks = new ArrayList<>(); + + for (int i = 0; i < TEST_TABLES; i++) { + String tableName = "test_table_%d".formatted(i); + String initialCdcInstanceName = "capture_instance_%d_%d".formatted(i, 1); + String finalCdcInstanceName = "capture_instance_%d_%d".formatted(i, 2); + createAndPopulateTableTasks.add(() -> testdb + .with("CREATE TABLE %s.%s (id INT IDENTITY(1,1) PRIMARY KEY);", TEST_SCHEMA, tableName) + .withCdcForTable(TEST_SCHEMA, tableName, CDC_ROLE_NAME, initialCdcInstanceName) + .with("INSERT INTO %s.%s DEFAULT VALUES", TEST_SCHEMA, tableName)); + waitForCdcRecordTasks.add(() -> testdb.waitForCdcRecords(TEST_SCHEMA, tableName, initialCdcInstanceName, 1)); + + // Increase schema history size to trigger state compression. + // We do this by adding lots of columns with long names, + // then migrating to a new CDC capture instance for each table. + // This is admittedly somewhat awkward and perhaps could be improved. + alterTabletasks.add(() -> testdb.with(ALTER_TABLE_ADD_COLUMN_SQL.formatted(tableName))); + enableTableCdctasks.add(() -> testdb.withCdcForTable(TEST_SCHEMA, tableName, CDC_ROLE_NAME, finalCdcInstanceName)); + disableTableCdctasks.add(() -> testdb.withCdcDisabledForTable(TEST_SCHEMA, tableName, initialCdcInstanceName)); + } + executor.invokeAll(createAndPopulateTableTasks); + executor.invokeAll(waitForCdcRecordTasks); + + // Create a test user to be used by the source, with proper permissions. + testdb + .with("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", testUserName(), testdb.getPassword(), testdb.getDatabaseName()) + .with("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", testUserName(), testUserName()) + .with("REVOKE ALL FROM %s CASCADE;", testUserName()) + .with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testUserName()) + .with("GRANT SELECT ON SCHEMA :: [%s] TO %s", TEST_SCHEMA, testUserName()) + .with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName()) + .with("USE [master]") + .with("GRANT VIEW SERVER STATE TO %s", testUserName()) + .with("USE [%s]", testdb.getDatabaseName()) + .with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName()); + executor.invokeAll(alterTabletasks); + executor.invokeAll(enableTableCdctasks); + executor.invokeAll(disableTableCdctasks); + } + + private AirbyteCatalog getCatalog() { + final var streams = new ArrayList(); + for (int i = 0; i < TEST_TABLES; i++) { + streams.add(CatalogHelpers.createAirbyteStream( + "test_table_%d".formatted(i), + TEST_SCHEMA, + Field.of("id", JsonSchemaType.INTEGER)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id")))); + } + return new AirbyteCatalog().withStreams(streams); + } + + private ConfiguredAirbyteCatalog getConfiguredCatalog() { + final var configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(getCatalog()); + configuredCatalog.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); + return configuredCatalog; + } + + private MssqlSource source() { + return new MssqlSource(); + } + + private JsonNode config() { + return testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testUserName()) + .with(JdbcUtils.PASSWORD_KEY, testdb.getPassword()) + .withSchemas(TEST_SCHEMA) + .withoutSsl() + // Configure for CDC replication but with a higher timeout than usual. + // This is because Debezium requires more time than usual to build the initial snapshot. + .with("is_test", true) + .with("replication_method", Map.of( + "method", "CDC", + "initial_waiting_seconds", 20)) + .build(); + } + + private String testUserName() { + return testdb.withNamespace(TEST_USER_NAME_PREFIX); + } + + /** + * This test is similar in principle to CdcMysqlSourceTest.testCompressedSchemaHistory. + */ + @Test + public void testCompressedSchemaHistory() throws Exception { + // First sync. + final var firstBatchIterator = source().read(config(), getConfiguredCatalog(), null); + final var dataFromFirstBatch = AutoCloseableIterators.toListAndClose(firstBatchIterator); + final AirbyteStateMessage lastStateMessageFromFirstBatch = + StateGeneratorUtils.convertLegacyStateToGlobalState(Iterables.getLast(extractStateMessages(dataFromFirstBatch))); + assertNotNull(lastStateMessageFromFirstBatch.getGlobal().getSharedState()); + final var lastSharedStateFromFirstBatch = lastStateMessageFromFirstBatch.getGlobal().getSharedState().get("state"); + assertNotNull(lastSharedStateFromFirstBatch); + assertNotNull(lastSharedStateFromFirstBatch.get(MSSQL_DB_HISTORY)); + assertNotNull(lastSharedStateFromFirstBatch.get(MSSQL_CDC_OFFSET)); + assertNotNull(lastSharedStateFromFirstBatch.get(IS_COMPRESSED)); + assertTrue(lastSharedStateFromFirstBatch.get(IS_COMPRESSED).asBoolean()); + final var recordsFromFirstBatch = extractRecordMessages(dataFromFirstBatch); + assertEquals(TEST_TABLES, recordsFromFirstBatch.size()); + for (final var record : recordsFromFirstBatch) { + assertEquals("1", record.getData().get("id").toString()); + } + + LOGGER.info("inserting new data into test tables"); + List> waitForCdcTasks = new ArrayList<>(); + // Insert a bunch of records (1 per table, again). + for (int i = 0; i < TEST_TABLES; i++) { + String tableName = "test_table_%d".formatted(i); + String cdcInstanceName = "capture_instance_%d_%d".formatted(i, 2); + testdb.with("INSERT %s.%s DEFAULT VALUES;", TEST_SCHEMA, tableName); + waitForCdcTasks.add(() -> testdb.waitForCdcRecords(TEST_SCHEMA, tableName, cdcInstanceName, 1)); + } + LOGGER.info("waiting for CDC records"); + executor.invokeAll(waitForCdcTasks); + + LOGGER.info("starting second sync"); + // Second sync. + final var secondBatchStateForRead = Jsons.jsonNode(Collections.singletonList(Iterables.getLast(extractStateMessages(dataFromFirstBatch)))); + final var secondBatchIterator = source().read(config(), getConfiguredCatalog(), secondBatchStateForRead); + final var dataFromSecondBatch = AutoCloseableIterators.toListAndClose(secondBatchIterator); + final AirbyteStateMessage lastStateMessageFromSecondBatch = + StateGeneratorUtils.convertLegacyStateToGlobalState(Iterables.getLast(extractStateMessages(dataFromSecondBatch))); + assertNotNull(lastStateMessageFromSecondBatch.getGlobal().getSharedState()); + final var lastSharedStateFromSecondBatch = lastStateMessageFromSecondBatch.getGlobal().getSharedState().get("state"); + assertNotNull(lastSharedStateFromSecondBatch); + assertNotNull(lastSharedStateFromSecondBatch.get(MSSQL_DB_HISTORY)); + assertEquals(lastSharedStateFromFirstBatch.get(MSSQL_DB_HISTORY), lastSharedStateFromSecondBatch.get(MSSQL_DB_HISTORY)); + assertNotNull(lastSharedStateFromSecondBatch.get(MSSQL_CDC_OFFSET)); + assertNotNull(lastSharedStateFromSecondBatch.get(IS_COMPRESSED)); + assertTrue(lastSharedStateFromSecondBatch.get(IS_COMPRESSED).asBoolean()); + final var recordsFromSecondBatch = extractRecordMessages(dataFromSecondBatch); + assertEquals(TEST_TABLES, recordsFromSecondBatch.size()); + for (final var record : recordsFromSecondBatch) { + assertEquals("2", record.getData().get("id").toString()); + } + } + + @AfterEach + public void tearDown() { + testdb.close(); + } + + private Set extractRecordMessages(final List messages) { + final var recordsPerStream = extractRecordMessagesStreamWise(messages); + return recordsPerStream.values().stream().flatMap(Set::stream).collect(Collectors.toSet()); + } + + private Map> extractRecordMessagesStreamWise(final List messages) { + final var recordsPerStream = messages.stream() + .filter(m -> m.getType() == Type.RECORD) + .map(AirbyteMessage::getRecord) + .collect(Collectors.groupingBy(AirbyteRecordMessage::getStream)); + + final Map> recordsPerStreamWithNoDuplicates = new HashMap<>(); + for (final var entry : recordsPerStream.entrySet()) { + final var set = new HashSet<>(entry.getValue()); + recordsPerStreamWithNoDuplicates.put(entry.getKey(), set); + assertEquals(entry.getValue().size(), set.size(), "duplicate records in sync for " + entry.getKey()); + } + + return recordsPerStreamWithNoDuplicates; + } + + private List extractStateMessages(final List messages) { + return messages.stream() + .filter(r -> r.getType() == Type.STATE) + .map(AirbyteMessage::getState) + .toList(); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java new file mode 100644 index 0000000000000..713b25728d5a2 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.Source; +import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; +import io.airbyte.cdk.integrations.base.ssh.SshTunnel; +import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +@Execution(ExecutionMode.CONCURRENT) +public class CloudDeploymentMssqlTest { + + private MsSQLTestDatabase createTestDatabase(String... containerFactoryMethods) { + final var container = new MsSQLContainerFactory().shared( + BaseImage.MSSQL_2022.reference, containerFactoryMethods); + final var testdb = new MsSQLTestDatabase(container); + return testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized(); + } + + private Source source() { + final var source = new MssqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingDeploymentMode(new EnvVariableFeatureFlags(), "CLOUD")); + return MssqlSource.sshWrappedSource(source); + } + + @Test + void testStrictSSLUnsecuredNoTunnel() throws Exception { + try (final var testdb = createTestDatabase()) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .withoutSsl() + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build()) + .build(); + final AirbyteConnectionStatus actual = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Unsecured connection not allowed"), actual.getMessage()); + } + } + + @Test + void testStrictSSLSecuredNoTunnel() throws Exception { + try (final var testdb = createTestDatabase()) { + final var config = testdb.testConfigBuilder() + .withEncrytedTrustServerCertificate() + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build()) + .build(); + final AirbyteConnectionStatus actual = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus()); + } + } + + @Test + void testStrictSSLSecuredWithTunnel() throws Exception { + try (final var testdb = createTestDatabase()) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .withEncrytedTrustServerCertificate() + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build()) + .build(); + final AirbyteConnectionStatus actual = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage()); + } + } + + @Test + void testStrictSSLUnsecuredWithTunnel() throws Exception { + try (final var testdb = createTestDatabase()) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .withEncrytedTrustServerCertificate() + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build()) + .build(); + final AirbyteConnectionStatus actual = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage()); + } + } + + @Test + void testCheckWithSslModeDisabled() throws Exception { + try (final var testdb = createTestDatabase("withNetwork")) { + try (final SshBastionContainer bastion = new SshBastionContainer()) { + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); + final var config = testdb.integrationTestConfigBuilder() + .with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, false)) + .withoutSsl() + .build(); + final AirbyteConnectionStatus actual = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus()); + } + } + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java new file mode 100644 index 0000000000000..968b799d2c55f --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.JdbcConnector; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import java.util.Map; +import javax.sql.DataSource; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.MSSQLServerContainer; + +public class MssqlAgentStateTest { + + private static MsSQLTestDatabase testdb; + private static DataSource testDataSource; + private static MSSQLServerContainer privateContainer; + + @BeforeAll + public static void setup() { + privateContainer = new MsSQLContainerFactory().exclusive( + MsSQLTestDatabase.BaseImage.MSSQL_2022.reference, + MsSQLTestDatabase.ContainerModifier.AGENT); + testdb = new MsSQLTestDatabase(privateContainer); + testdb + .withConnectionProperty("encrypt", "false") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .withWaitUntilAgentRunning() + .withCdc(); + testDataSource = DataSourceFactory.create( + testdb.getUserName(), + testdb.getPassword(), + testdb.getDatabaseDriver().getDriverClassName(), + testdb.getJdbcUrl(), + Map.of("encrypt", "false", "trustServerCertificate", "true"), + JdbcConnector.CONNECT_TIMEOUT_DEFAULT); + } + + @AfterAll + static void tearDown() { + try { + DataSourceFactory.close(testDataSource); + testdb.close(); + } catch (Exception e) { + throw new RuntimeException(e); + } + privateContainer.close(); + } + + protected MssqlSource source() { + return new MssqlSource(); + } + + private JdbcDatabase testDatabase() { + return new DefaultJdbcDatabase(testDataSource); + } + + protected JsonNode config() { + return testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, testdb.getPassword()) + .withCdcReplication() + .withoutSsl() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); + } + + @Test + void testAssertSqlServerAgentRunning() throws Exception { + testdb.withAgentStopped().withWaitUntilAgentStopped(); + // assert expected failure if sql server agent stopped + assertThrows(RuntimeException.class, + () -> source().assertSqlServerAgentRunning(testDatabase())); + // assert success if sql server agent running + testdb.withAgentStarted().withWaitUntilAgentRunning(); + assertDoesNotThrow(() -> source().assertSqlServerAgentRunning(testDatabase())); + } + + // Ensure the CDC check operations are included when CDC is enabled + // todo: make this better by checking the returned checkOperations from source.getCheckOperations + @Test + void testCdcCheckOperations() throws Exception { + // assertCdcEnabledInDb + testdb.withoutCdc(); + AirbyteConnectionStatus status = source().check(config()); + assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); + testdb.withCdc(); + // assertCdcSchemaQueryable + testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName()); + status = source().check(config()); + assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); + testdb.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName()); + + // assertSqlServerAgentRunning + + testdb.withAgentStopped().withWaitUntilAgentStopped(); + status = source().check(config()); + assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); + testdb.withAgentStarted().withWaitUntilAgentRunning(); + status = source().check(config()); + assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java new file mode 100644 index 0000000000000..d1ec53fe19157 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class MssqlCdcHelperTest { + + private static final JsonNode LEGACY_NON_CDC_CONFIG = Jsons.jsonNode(Map.of("replication_method", "STANDARD")); + private static final JsonNode LEGACY_CDC_CONFIG = Jsons.jsonNode(Map.of("replication_method", "CDC")); + + @Test + public void testIsCdc() { + // legacy replication method config before version 0.4.0 + assertFalse(MssqlCdcHelper.isCdc(LEGACY_NON_CDC_CONFIG)); + assertTrue(MssqlCdcHelper.isCdc(LEGACY_CDC_CONFIG)); + + // new replication method config since version 0.4.0 + final JsonNode newNonCdc = Jsons.jsonNode(Map.of("replication_method", + Jsons.jsonNode(Map.of("method", "STANDARD")))); + assertFalse(MssqlCdcHelper.isCdc(newNonCdc)); + + final JsonNode newCdc = Jsons.jsonNode(Map.of("replication_method", + Jsons.jsonNode(Map.of( + "method", "CDC")))); + assertTrue(MssqlCdcHelper.isCdc(newCdc)); + + // migration from legacy to new config + final JsonNode mixNonCdc = Jsons.jsonNode(Map.of( + "replication_method", Jsons.jsonNode(Map.of("method", "STANDARD")), + "replication", Jsons.jsonNode(Map.of("replication_type", "CDC")))); + assertFalse(MssqlCdcHelper.isCdc(mixNonCdc)); + + final JsonNode mixCdc = Jsons.jsonNode(Map.of( + "replication", Jsons.jsonNode(Map.of( + "replication_type", "Standard")), + "replication_method", Jsons.jsonNode(Map.of( + "method", "CDC")))); + assertTrue(MssqlCdcHelper.isCdc(mixCdc)); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java new file mode 100644 index 0000000000000..64d2fae404d25 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import com.zaxxer.hikari.HikariDataSource; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import java.util.Map; +import javax.sql.DataSource; +import org.junit.jupiter.api.Test; + +public class MssqlDataSourceFactoryTest { + + @Test + protected void testCreatingDataSourceWithConnectionTimeoutSetBelowDefault() { + try (var testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022)) { + final Map connectionProperties = Map.of("loginTimeout", String.valueOf(5)); + final DataSource dataSource = DataSourceFactory.create( + testdb.getUserName(), + testdb.getPassword(), + testdb.getDatabaseDriver().getDriverClassName(), + testdb.getJdbcUrl(), + connectionProperties, + new MssqlSource().getConnectionTimeoutMssql(connectionProperties)); + assertNotNull(dataSource); + assertEquals(HikariDataSource.class, dataSource.getClass()); + assertEquals(5000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout()); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java new file mode 100644 index 0000000000000..56125b994ab46 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes; +import io.debezium.connector.sqlserver.Lsn; +import org.junit.jupiter.api.Test; + +public class MssqlDebeziumStateUtilTest { + + private static String DB_NAME = "db_name"; + private static String LSN_STRING = "0000062d:00017ff0:016d"; + private static Lsn LSN = Lsn.valueOf(LSN_STRING); + + @Test + void generateCorrectFormat() { + MssqlDebeziumStateAttributes attributes = new MssqlDebeziumStateAttributes(LSN); + JsonNode formatResult = MssqlDebeziumStateUtil.format(attributes, DB_NAME); + assertEquals("{\"commit_lsn\":\"0000062d:00017ff0:016d\",\"snapshot\":true,\"snapshot_completed\":true}", + formatResult.get("[\"db_name\",{\"server\":\"db_name\",\"database\":\"db_name\"}]").asText()); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java new file mode 100644 index 0000000000000..8676c3c592bf7 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.integrations.source.mssql.MssqlQueryUtils.TableSizeInfo; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import org.junit.jupiter.api.Test; + +public class MssqlInitialLoadHandlerTest { + + private static final long ONE_GB = 1_073_741_824; + private static final long ONE_MB = 1_048_576; + + @Test + void testInvalidOrNullTableSizeInfo() { + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair("table_name", "schema_name"); + assertEquals(MssqlInitialLoadHandler.calculateChunkSize(null, pair), 1_000_000L); + final TableSizeInfo invalidRowLengthInfo = new TableSizeInfo(ONE_GB, 0L); + assertEquals(MssqlInitialLoadHandler.calculateChunkSize(invalidRowLengthInfo, pair), 1_000_000L); + final TableSizeInfo invalidTableSizeInfo = new TableSizeInfo(0L, 0L); + assertEquals(MssqlInitialLoadHandler.calculateChunkSize(invalidTableSizeInfo, pair), 1_000_000L); + } + + @Test + void testTableSizeInfo() { + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair("table_name", "schema_name"); + assertEquals(MssqlInitialLoadHandler.calculateChunkSize(new TableSizeInfo(ONE_GB, 2 * ONE_MB), pair), 512L); + assertEquals(MssqlInitialLoadHandler.calculateChunkSize(new TableSizeInfo(ONE_GB, 200L), pair), 5368709L); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java new file mode 100644 index 0000000000000..f35ee12c8c654 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; +import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY; +import static java.util.stream.Collectors.toList; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; +import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus; +import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteCatalog; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteMessage.Type; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.v0.AirbyteStateStats; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import io.airbyte.protocol.models.v0.StreamDescriptor; +import io.airbyte.protocol.models.v0.SyncMode; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH") +public class MssqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { + + protected static final String USERNAME_WITHOUT_PERMISSION = "new_user"; + protected static final String PASSWORD_WITHOUT_PERMISSION = "password_3435!"; + + static { + // In mssql, timestamp is generated automatically, so we need to use + // the datetime type instead so that we can set the value manually. + COL_TIMESTAMP_TYPE = "DATETIME2"; + } + + @Override + protected JsonNode config() { + return testdb.testConfigBuilder() + .withoutSsl() + .build(); + } + + @Override + protected MssqlSource source() { + return new MssqlSource(); + } + + @Override + protected MsSQLTestDatabase createTestDatabase() { + return MsSQLTestDatabase.in(BaseImage.MSSQL_2022); + } + + @Override + public boolean supportsSchemas() { + return true; + } + + @Override + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=1"); + } + + @Test + void testCheckIncorrectPasswordFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); + final AirbyteConnectionStatus status = source().check(config); + Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage()); + } + + @Test + public void testCheckIncorrectUsernameFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake"); + final AirbyteConnectionStatus status = source().check(config); + Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage()); + } + + @Test + public void testCheckIncorrectHostFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2"); + final AirbyteConnectionStatus status = source().check(config); + Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); + } + + @Test + public void testCheckIncorrectPortFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.PORT_KEY, "0000"); + final AirbyteConnectionStatus status = source().check(config); + Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); + } + + @Test + public void testCheckIncorrectDataBaseFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase"); + final AirbyteConnectionStatus status = source().check(config); + Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage()); + } + + @Test + public void testUserHasNoPermissionToDataBase() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + testdb.with("CREATE LOGIN %s WITH PASSWORD = '%s'; ", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION); + ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, USERNAME_WITHOUT_PERMISSION); + ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, PASSWORD_WITHOUT_PERMISSION); + final AirbyteConnectionStatus status = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage()); + } + + @Test + @Override + protected void testReadMultipleTablesIncrementally() throws Exception { + final var config = config(); + ((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1); + final String streamOneName = TABLE_NAME + "one"; + // Create a fresh first table + testdb.with("CREATE TABLE %s (\n" + + " id INT NOT NULL PRIMARY KEY,\n" + + " name VARCHAR(50) NOT NULL,\n" + + " updated_at DATE NOT NULL\n" + + ");", getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1, 'picard', '2004-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", + getFullyQualifiedTableName(streamOneName)); + + // Create a fresh second table + final String streamTwoName = TABLE_NAME + "two"; + final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName); + // Insert records into second table + testdb.with("CREATE TABLE %s (\n" + + " id INT NOT NULL PRIMARY KEY,\n" + + " name VARCHAR(50) NOT NULL,\n" + + " updated_at DATE NOT NULL\n" + + ");", streamTwoFullyQualifiedName) + .with("INSERT INTO %s (id, name, updated_at) VALUES (40, 'Jean Luc','2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s (id, name, updated_at) VALUES (41, 'Groot', '2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s (id, name, updated_at) VALUES (42, 'Thanos','2006-10-19')", + streamTwoFullyQualifiedName); + + final List streamOneExpectedRecords = Arrays.asList( + createRecord(streamOneName, getDefaultNamespace(), Map + .of(COL_ID, ID_VALUE_1, + COL_NAME, "picard", + COL_UPDATED_AT, "2004-10-19")), + createRecord(streamOneName, getDefaultNamespace(), Map + .of(COL_ID, ID_VALUE_2, + COL_NAME, "crusher", + COL_UPDATED_AT, + "2005-10-19")), + createRecord(streamOneName, getDefaultNamespace(), Map + .of(COL_ID, ID_VALUE_3, + COL_NAME, "vash", + COL_UPDATED_AT, "2006-10-19"))); + + // Create records list that we expect to see in the state message + final List streamTwoExpectedRecords = Arrays.asList( + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( + COL_ID, 40, + COL_NAME, "Jean Luc", + COL_UPDATED_AT, "2006-10-19")), + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( + COL_ID, 41, + COL_NAME, "Groot", + COL_UPDATED_AT, "2006-10-19")), + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( + COL_ID, 42, + COL_NAME, "Thanos", + COL_UPDATED_AT, "2006-10-19"))); + + // Prep and create a configured catalog to perform sync + final AirbyteStream streamOne = getAirbyteStream(streamOneName, getDefaultNamespace()); + final AirbyteStream streamTwo = getAirbyteStream(streamTwoName, getDefaultNamespace()); + + final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( + new AirbyteCatalog().withStreams(List.of(streamOne, streamTwo))); + configuredCatalog.getStreams().forEach(airbyteStream -> { + airbyteStream.setSyncMode(SyncMode.INCREMENTAL); + airbyteStream.setCursorField(List.of(COL_ID)); + airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); + airbyteStream.withPrimaryKey(List.of(List.of(COL_ID))); + }); + + // Perform initial sync + final List messagesFromFirstSync = MoreIterators + .toList(source().read(config, configuredCatalog, null)); + + final List recordsFromFirstSync = filterRecords(messagesFromFirstSync); + + setEmittedAtToNull(messagesFromFirstSync); + // All records in the 2 configured streams should be present + assertThat(filterRecords(recordsFromFirstSync)).containsExactlyElementsOf( + Stream.concat(streamOneExpectedRecords.stream().parallel(), + streamTwoExpectedRecords.stream().parallel()).collect(toList())); + + final List actualFirstSyncState = extractStateMessage(messagesFromFirstSync); + // Since we are emitting a state message after each record, we should have 1 state for each record - + // 3 from stream1 and 3 from stream2 + assertEquals(6, actualFirstSyncState.size()); + + // The expected state type should be 2 ordered_column's and the last one being cursor_based + final List expectedStateTypesFromFirstSync = List.of("ordered_column", "ordered_column", "cursor_based"); + final List stateTypeOfStreamOneStatesFromFirstSync = + extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, STATE_TYPE_KEY); + final List stateTypeOfStreamTwoStatesFromFirstSync = + extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamTwoName, STATE_TYPE_KEY); + // It should be the same for stream1 and stream2 + assertEquals(stateTypeOfStreamOneStatesFromFirstSync, expectedStateTypesFromFirstSync); + assertEquals(stateTypeOfStreamTwoStatesFromFirstSync, expectedStateTypesFromFirstSync); + + // Create the expected ordered_column values that we should see + final List expectedOrderedColumnValueFromFirstSync = List.of("1", "2"); + final List orderedColumnValuesOfStreamOneFromFirstSync = + extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, "ordered_col_val"); + final List orderedColumnValuesOfStreamTwoFromFirstSync = + extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, "ordered_col_val"); + + // Verifying each element and its index to match. + // Only checking the first 2 elements since we have verified that the last state_type is + // "cursor_based" + assertEquals(expectedOrderedColumnValueFromFirstSync.get(0), orderedColumnValuesOfStreamOneFromFirstSync.get(0)); + assertEquals(expectedOrderedColumnValueFromFirstSync.get(1), orderedColumnValuesOfStreamOneFromFirstSync.get(1)); + assertEquals(expectedOrderedColumnValueFromFirstSync.get(0), orderedColumnValuesOfStreamTwoFromFirstSync.get(0)); + assertEquals(expectedOrderedColumnValueFromFirstSync.get(1), orderedColumnValuesOfStreamTwoFromFirstSync.get(1)); + + // Extract only state messages for each stream + final List streamOneStateMessagesFromFirstSync = extractStateMessage(messagesFromFirstSync, streamOneName); + final List streamTwoStateMessagesFromFirstSync = extractStateMessage(messagesFromFirstSync, streamTwoName); + // Extract the incremental states of each stream's first and second state message + final List streamOneIncrementalStatesFromFirstSync = + List.of(streamOneStateMessagesFromFirstSync.get(0).getStream().getStreamState().get("incremental_state"), + streamOneStateMessagesFromFirstSync.get(1).getStream().getStreamState().get("incremental_state")); + final JsonNode streamOneFinalStreamStateFromFirstSync = streamOneStateMessagesFromFirstSync.get(2).getStream().getStreamState(); + + final List streamTwoIncrementalStatesFromFirstSync = + List.of(streamTwoStateMessagesFromFirstSync.get(0).getStream().getStreamState().get("incremental_state"), + streamTwoStateMessagesFromFirstSync.get(1).getStream().getStreamState().get("incremental_state")); + final JsonNode streamTwoFinalStreamStateFromFirstSync = streamTwoStateMessagesFromFirstSync.get(2).getStream().getStreamState(); + + // The incremental_state of each stream's first and second incremental states is expected + // to be identical to the stream_state of the final state message for each stream + assertEquals(streamOneIncrementalStatesFromFirstSync.get(0), streamOneFinalStreamStateFromFirstSync); + assertEquals(streamOneIncrementalStatesFromFirstSync.get(1), streamOneFinalStreamStateFromFirstSync); + assertEquals(streamTwoIncrementalStatesFromFirstSync.get(0), streamTwoFinalStreamStateFromFirstSync); + assertEquals(streamTwoIncrementalStatesFromFirstSync.get(1), streamTwoFinalStreamStateFromFirstSync); + + // Sync should work with a ordered_column state AND a cursor-based state from each stream + // Forcing a sync with + // - stream one state still being the first record read via Ordered column. + // - stream two state being the Ordered Column state before the final emitted state before the + // cursor + // switch + final List messagesFromSecondSyncWithMixedStates = MoreIterators + .toList(source().read(config, configuredCatalog, + Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0), + streamTwoStateMessagesFromFirstSync.get(1))))); + + // Extract only state messages for each stream after second sync + final List streamOneStateMessagesFromSecondSync = + extractStateMessage(messagesFromSecondSyncWithMixedStates, streamOneName); + final List stateTypeOfStreamOneStatesFromSecondSync = + extractSpecificFieldFromCombinedMessages(messagesFromSecondSyncWithMixedStates, streamOneName, STATE_TYPE_KEY); + + final List streamTwoStateMessagesFromSecondSync = + extractStateMessage(messagesFromSecondSyncWithMixedStates, streamTwoName); + final List stateTypeOfStreamTwoStatesFromSecondSync = + extractSpecificFieldFromCombinedMessages(messagesFromSecondSyncWithMixedStates, streamTwoName, STATE_TYPE_KEY); + + // Stream One states after the second sync are expected to have 2 stream states + // - 1 with PrimaryKey state_type and 1 state that is of cursorBased state type + assertEquals(2, streamOneStateMessagesFromSecondSync.size()); + assertEquals(List.of("ordered_column", "cursor_based"), stateTypeOfStreamOneStatesFromSecondSync); + + // Stream Two states after the second sync are expected to have 1 stream state + // - The state that is of cursorBased state type + assertEquals(1, streamTwoStateMessagesFromSecondSync.size()); + assertEquals(List.of("cursor_based"), stateTypeOfStreamTwoStatesFromSecondSync); + + // Add some data to each table and perform a third read. + // Expect to see all records be synced via cursorBased method and not ordered_column + testdb.with("INSERT INTO %s (id, name, updated_at) VALUES (4,'Hooper','2006-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s (id, name, updated_at) VALUES (43, 'Iron Man', '2006-10-19')", + streamTwoFullyQualifiedName); + + final List messagesFromThirdSync = MoreIterators + .toList(source().read(config, configuredCatalog, + Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1), + streamTwoStateMessagesFromSecondSync.get(0))))); + + // Extract only state messages, state type, and cursor for each stream after second sync + final List streamOneStateMessagesFromThirdSync = + extractStateMessage(messagesFromThirdSync, streamOneName); + final List stateTypeOfStreamOneStatesFromThirdSync = + extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamOneName, STATE_TYPE_KEY); + final List cursorOfStreamOneStatesFromThirdSync = + extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamOneName, "cursor"); + + final List streamTwoStateMessagesFromThirdSync = + extractStateMessage(messagesFromThirdSync, streamTwoName); + final List stateTypeOfStreamTwoStatesFromThirdSync = + extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamTwoName, STATE_TYPE_KEY); + final List cursorOfStreamTwoStatesFromThirdSync = + extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamTwoName, "cursor"); + + // Both streams should now be synced via standard cursor and have updated max cursor values + // cursor: 4 for stream one + // cursor: 43 for stream two + assertEquals(1, streamOneStateMessagesFromThirdSync.size()); + assertEquals(List.of("cursor_based"), stateTypeOfStreamOneStatesFromThirdSync); + assertEquals(List.of("4"), cursorOfStreamOneStatesFromThirdSync); + + assertEquals(1, streamTwoStateMessagesFromThirdSync.size()); + assertEquals(List.of("cursor_based"), stateTypeOfStreamTwoStatesFromThirdSync); + assertEquals(List.of("43"), cursorOfStreamTwoStatesFromThirdSync); + } + + private AirbyteStream getAirbyteStream(final String tableName, final String namespace) { + return CatalogHelpers.createAirbyteStream( + tableName, + namespace, + Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_NAME, JsonSchemaType.STRING), + Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))); + } + + @Override + protected AirbyteCatalog getCatalog(final String defaultNamespace) { + return new AirbyteCatalog().withStreams(List.of( + CatalogHelpers.createAirbyteStream( + TABLE_NAME, + defaultNamespace, + Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_NAME, JsonSchemaType.STRING), + Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))) + .withIsResumable(true), + CatalogHelpers.createAirbyteStream( + TABLE_NAME_WITHOUT_PK, + defaultNamespace, + Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_NAME, JsonSchemaType.STRING), + Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(Collections.emptyList()) + .withIsResumable(false), + CatalogHelpers.createAirbyteStream( + TABLE_NAME_COMPOSITE_PK, + defaultNamespace, + Field.of(COL_FIRST_NAME, JsonSchemaType.STRING), + Field.of(COL_LAST_NAME, JsonSchemaType.STRING), + Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey( + List.of(List.of(COL_FIRST_NAME), List.of(COL_LAST_NAME))) + .withIsResumable(true))); + } + + @Override + protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configuredAirbyteStream, + final String cursorField, + final String cursorValue) { + return new CursorBasedStatus().withStateType(StateType.CURSOR_BASED).withVersion(2L) + .withStreamName(configuredAirbyteStream.getStream().getName()) + .withStreamNamespace(configuredAirbyteStream.getStream().getNamespace()) + .withCursorField(List.of(cursorField)) + .withCursor(cursorValue) + .withCursorRecordCount(1L); + } + + // Override from parent class as we're no longer including the legacy Data field. + @Override + protected List createExpectedTestMessages(final List states, final long numRecords) { + return states.stream() + .map(s -> new AirbyteMessage().withType(Type.STATE) + .withState( + new AirbyteStateMessage().withType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s))) + .withSourceStats(new AirbyteStateStats().withRecordCount((double) numRecords)))) + .collect( + Collectors.toList()); + } + + @Override + protected JsonNode getStateData(final AirbyteMessage airbyteMessage, final String streamName) { + final JsonNode streamState = airbyteMessage.getState().getStream().getStreamState(); + if (streamState.get("stream_name").asText().equals(streamName)) { + return streamState; + } + + throw new IllegalArgumentException("Stream not found in state message: " + streamName); + } + + @Override + protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { + final List expectedMessages = new ArrayList<>(); + expectedMessages.addAll(List.of(createRecord(streamName(), namespace, ImmutableMap + .of(COL_ID, ID_VALUE_4, + COL_NAME, "riker", + COL_UPDATED_AT, "2006-10-19")), + createRecord(streamName(), namespace, ImmutableMap + .of(COL_ID, ID_VALUE_5, + COL_NAME, "data", + COL_UPDATED_AT, "2006-10-19")))); + final DbStreamState state = new CursorBasedStatus() + .withStateType(StateType.CURSOR_BASED) + .withVersion(2L) + .withStreamName(streamName()) + .withStreamNamespace(namespace) + .withCursorField(ImmutableList.of(COL_ID)) + .withCursor("5") + .withCursorRecordCount(1L); + + expectedMessages.addAll(createExpectedTestMessages(List.of(state), 2L)); + return expectedMessages; + } + + @Override + protected void validateFullRefreshStateMessageReadSuccess(final List stateMessages) { + var finalStateMessage = stateMessages.get(stateMessages.size() - 1); + assertEquals( + finalStateMessage.getStream().getStreamState().get("state_type").textValue(), + "ordered_column"); + assertEquals(finalStateMessage.getStream().getStreamState().get("ordered_col").textValue(), "id"); + assertEquals(finalStateMessage.getStream().getStreamState().get("ordered_col_val").textValue(), "3"); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java new file mode 100644 index 0000000000000..6daecf68817d9 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import static org.assertj.core.api.AssertionsForClassTypes.catchThrowable; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.Lists; +import io.airbyte.commons.exceptions.ConfigErrorException; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.*; +import java.sql.SQLException; +import java.util.Collections; +import java.util.List; +import org.junit.jupiter.api.*; + +class MssqlSourceTest { + + private static final String STREAM_NAME = "id_and_name"; + private static final AirbyteCatalog CATALOG = new AirbyteCatalog().withStreams(Lists.newArrayList(CatalogHelpers.createAirbyteStream( + STREAM_NAME, + "dbo", + Field.of("id", JsonSchemaType.INTEGER), + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id"))) + .withIsResumable(true))); + + private MsSQLTestDatabase testdb; + + private MssqlSource source() { + return new MssqlSource(); + } + + // how to interact with the mssql test container manaully. + // 1. exec into mssql container (not the test container container) + // 2. /opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P "A_Str0ng_Required_Password" + @BeforeEach + void setup() { + testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022) + .with("CREATE TABLE id_and_name(id INTEGER NOT NULL, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', " + + "'2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); + } + + @AfterEach + void cleanUp() { + testdb.close(); + } + + private JsonNode getConfig() { + return testdb.testConfigBuilder() + .withoutSsl() + .build(); + } + + // if a column in mssql is used as a primary key and in a separate index the discover query returns + // the column twice. we now de-duplicate it (pr: https://github.com/airbytehq/airbyte/pull/983). + // this tests that this de-duplication is successful. + @Test + void testDiscoverWithPk() { + testdb + .with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY CLUSTERED (id);") + .with("CREATE INDEX i1 ON id_and_name (id);"); + final AirbyteCatalog actual = source().discover(getConfig()); + assertEquals(CATALOG, actual); + } + + @Test + void testDiscoverWithoutPk() { + final AirbyteCatalog actual = source().discover(getConfig()); + assertEquals(STREAM_NAME, actual.getStreams().get(0).getName()); + assertEquals(false, actual.getStreams().get(0).getIsResumable()); + } + + @Test + @Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out") + public void testTableWithNullCursorValueShouldThrowException() throws Exception { + testdb + .with("ALTER TABLE id_and_name ALTER COLUMN id INTEGER NULL") + .with("INSERT INTO id_and_name(id) VALUES (7), (8), (NULL)"); + + ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode( + SyncMode.INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(SyncMode.INCREMENTAL) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME, + testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.INTEGER), + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING)) + .withSupportedSyncModes( + Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id")))); + + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams( + Collections.singletonList(configuredAirbyteStream)); + + final Throwable throwable = catchThrowable(() -> MoreIterators.toSet( + source().read(getConfig(), catalog, null))); + assertThat(throwable).isInstanceOf(ConfigErrorException.class) + .hasMessageContaining( + "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='dbo.id_and_name', cursorColumnName='id', cursorSqlType=INTEGER, cause=Cursor column contains NULL value}"); + } + + @Test + void testDiscoverWithNonClusteredPk() throws SQLException { + testdb + .with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);") + .with("CREATE INDEX i1 ON id_and_name (id);") + .with("CREATE CLUSTERED INDEX n1 ON id_and_name (name)"); + final AirbyteCatalog actual = source().discover(getConfig()); + assertEquals(CATALOG, actual); + final var db = source().createDatabase(getConfig()); + final String oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db, + new AirbyteStream().withName( + actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace())); + assertEquals(oc, "name"); + } + + @Test + void testDiscoverWithNoClusteredIndex() throws SQLException { + testdb + .with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);") + .with("CREATE INDEX i1 ON id_and_name (id);") + .with("CREATE NONCLUSTERED INDEX n1 ON id_and_name (name)"); + final AirbyteCatalog actual = source().discover(getConfig()); + assertEquals(CATALOG, actual); + final var db = source().createDatabase(getConfig()); + final String oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db, + new AirbyteStream().withName( + actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace())); + assertNull(oc); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java new file mode 100644 index 0000000000000..026e18a6a1c87 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static org.junit.jupiter.api.Assertions.fail; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.commons.exceptions.ConnectionErrorException; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.CertificateKey; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier; +import io.airbyte.protocol.models.v0.AirbyteCatalog; +import java.net.InetAddress; +import java.util.Map; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MssqlSslSourceTest { + + private MsSQLTestDatabase testDb; + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSslSourceTest.class); + + @BeforeEach + void setup() { + testDb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT, ContainerModifier.WITH_SSL_CERTIFICATES); + } + + @AfterEach + public void tearDown() { + testDb.close(); + } + + @ParameterizedTest + @EnumSource(CertificateKey.class) + public void testDiscoverWithCertificateTrustHostnameWithValidCertificates(CertificateKey certificateKey) throws Exception { + if (!certificateKey.isValid) { + return; + } + String certificate = testDb.getCertificate(certificateKey); + JsonNode config = testDb.testConfigBuilder() + .withSsl(Map.of("ssl_method", "encrypted_verify_certificate", + "certificate", certificate)) + .build(); + AirbyteCatalog catalog = new MssqlSource().discover(config); + } + + @ParameterizedTest + @EnumSource(CertificateKey.class) + public void testDiscoverWithCertificateTrustHostnameWithInvalidCertificates(CertificateKey certificateKey) throws Exception { + if (certificateKey.isValid) { + return; + } + String certificate = testDb.getCertificate(certificateKey); + JsonNode config = testDb.testConfigBuilder() + .withSsl(Map.of("ssl_method", "encrypted_verify_certificate", + "certificate", certificate)) + .build(); + try { + AirbyteCatalog catalog = new MssqlSource().discover(config); + } catch (ConnectionErrorException e) { + if (!e.getCause().getCause().getMessage().contains("PKIX path validation") && + !e.getCause().getCause().getMessage().contains("PKIX path building failed")) { + throw e; + } + } + } + + @ParameterizedTest + @EnumSource(CertificateKey.class) + public void testDiscoverWithCertificateNoTrustHostnameWrongHostname(CertificateKey certificateKey) throws Throwable { + if (!certificateKey.isValid) { + return; + } + String containerIp = InetAddress.getByName(testDb.getContainer().getHost()).getHostAddress(); + String certificate = testDb.getCertificate(certificateKey); + JsonNode config = testDb.configBuilder() + .withSsl(Map.of("ssl_method", "encrypted_verify_certificate", + "certificate", certificate)) + .with(JdbcUtils.HOST_KEY, containerIp) + .with(JdbcUtils.PORT_KEY, testDb.getContainer().getFirstMappedPort()) + .withCredentials() + .withDatabase() + .build(); + try { + AirbyteCatalog catalog = new MssqlSource().discover(config); + fail("discover should have failed!"); + } catch (ConnectionErrorException e) { + String expectedMessage = + "Failed to validate the server name \"" + containerIp + "\"in a certificate during Secure Sockets Layer (SSL) initialization."; + if (!e.getExceptionMessage().contains(expectedMessage)) { + fail("exception message was " + e.getExceptionMessage() + "\n expected: " + expectedMessage); + } + } + } + + @ParameterizedTest + @EnumSource(CertificateKey.class) + public void testDiscoverWithCertificateNoTrustHostnameAlternateHostname(CertificateKey certificateKey) throws Exception { + final String containerIp = InetAddress.getByName(testDb.getContainer().getHost()).getHostAddress(); + if (certificateKey.isValid) { + String certificate = testDb.getCertificate(certificateKey); + JsonNode config = testDb.configBuilder() + .withSsl(Map.of("ssl_method", "encrypted_verify_certificate", + "certificate", certificate, + "hostNameInCertificate", testDb.getContainer().getHost())) + .with(JdbcUtils.HOST_KEY, containerIp) + .with(JdbcUtils.PORT_KEY, testDb.getContainer().getFirstMappedPort()) + .withCredentials() + .withDatabase() + .build(); + AirbyteCatalog catalog = new MssqlSource().discover(config); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java new file mode 100644 index 0000000000000..054b8c3d6f644 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; +import io.airbyte.cdk.integrations.source.jdbc.test.JdbcStressTest; +import java.sql.JDBCType; +import java.util.Optional; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; + +@Disabled +public class MssqlStressTest extends JdbcStressTest { + + private MsSQLTestDatabase testdb; + + @BeforeEach + public void setup() throws Exception { + testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022); + super.setup(); + } + + @Override + public Optional getDefaultSchemaName() { + return Optional.of("dbo"); + } + + @Override + public JsonNode getConfig() { + return testdb.testConfigBuilder().with("is_test", true).build(); + } + + @Override + public AbstractJdbcSource getSource() { + return new MssqlSource(); + } + + @Override + public String getDriverClass() { + return MssqlSource.DRIVER_CLASS; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java b/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java new file mode 100644 index 0000000000000..22314da513f95 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.cdk.testutils.ContainerFactory; +import org.apache.commons.lang3.StringUtils; +import org.testcontainers.containers.MSSQLServerContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.utility.DockerImageName; + +public class MsSQLContainerFactory extends ContainerFactory> { + + @Override + protected MSSQLServerContainer createNewContainer(DockerImageName imageName) { + imageName = imageName.asCompatibleSubstituteFor("mcr.microsoft.com/mssql/server"); + var container = new MSSQLServerContainer<>(imageName).acceptLicense(); + container.addEnv("MSSQL_MEMORY_LIMIT_MB", "384"); + withNetwork(container); + return container; + } + + /** + * Create a new network and bind it to the container. + */ + public static void withNetwork(MSSQLServerContainer container) { + container.withNetwork(Network.newNetwork()); + } + + public static void withAgent(MSSQLServerContainer container) { + container.addEnv("MSSQL_AGENT_ENABLED", "True"); + } + + public static void withSslCertificates(MSSQLServerContainer container) { + // yes, this is uglier than sin. The reason why I'm doing this is because there's no command to + // reload a SqlServer config. So I need to create all the necessary files before I start the + // SQL server. Hence this horror + String command = StringUtils.replace( + """ + mkdir /tmp/certs/ && + openssl req -nodes -new -x509 -sha256 -keyout /tmp/certs/ca.key -out /tmp/certs/ca.crt -subj "/CN=ca" && + openssl req -nodes -new -x509 -sha256 -keyout /tmp/certs/dummy_ca.key -out /tmp/certs/dummy_ca.crt -subj "/CN=ca" && + openssl req -nodes -new -sha256 -keyout /tmp/certs/server.key -out /tmp/certs/server.csr -subj "/CN={hostName}" && + openssl req -nodes -new -sha256 -keyout /tmp/certs/dummy_server.key -out /tmp/certs/dummy_server.csr -subj "/CN={hostName}" && + + openssl x509 -req -in /tmp/certs/server.csr -CA /tmp/certs/ca.crt -CAkey /tmp/certs/ca.key -out /tmp/certs/server.crt -days 365 -sha256 && + openssl x509 -req -in /tmp/certs/dummy_server.csr -CA /tmp/certs/ca.crt -CAkey /tmp/certs/ca.key -out /tmp/certs/dummy_server.crt -days 365 -sha256 && + openssl x509 -req -in /tmp/certs/server.csr -CA /tmp/certs/dummy_ca.crt -CAkey /tmp/certs/dummy_ca.key -out /tmp/certs/server_dummy_ca.crt -days 365 -sha256 && + chmod 440 /tmp/certs/* && + { + cat > /var/opt/mssql/mssql.conf <<- EOF + [network] + tlscert = /tmp/certs/server.crt + tlskey = /tmp/certs/server.key + tlsprotocols = 1.2 + forceencryption = 1 + EOF + } && /opt/mssql/bin/sqlservr + """, + "{hostName}", container.getHost()); + container.withCommand("bash", "-c", command) + .withUrlParam("trustServerCertificate", "true"); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java b/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java new file mode 100644 index 0000000000000..79f27b0355f12 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.INVALID_CDC_CURSOR_POSITION_PROPERTY; +import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.RESYNC_DATA_OPTION; + +import com.google.common.collect.Sets; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.testutils.ContainerFactory.NamedContainerModifier; +import io.airbyte.cdk.testutils.TestDatabase; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil; +import io.debezium.connector.sqlserver.Lsn; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.jooq.exception.DataAccessException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.MSSQLServerContainer; + +public class MsSQLTestDatabase extends TestDatabase, MsSQLTestDatabase, MsSQLTestDatabase.MsSQLConfigBuilder> { + + static private final Logger LOGGER = LoggerFactory.getLogger(MsSQLTestDatabase.class); + + // Turning this to true will create a bunch of background threads that will regularly check the + // state of the database and log every time it changes. A bit verbose, but useful for debugging + private static final boolean ENABLE_BACKGROUND_THREADS = false; + + // empirically, 240 is enough. If you fee like you need to increase it, you're probably mmissing a + // check somewhere + static public final int MAX_RETRIES = 240; + + public enum BaseImage { + + MSSQL_2022("mcr.microsoft.com/mssql/server:2022-latest"), + ; + + public final String reference; + + BaseImage(final String reference) { + this.reference = reference; + } + + } + + public enum ContainerModifier implements NamedContainerModifier> { + + AGENT(MsSQLContainerFactory::withAgent), + WITH_SSL_CERTIFICATES(MsSQLContainerFactory::withSslCertificates), + ; + + public final Consumer> modifier; + + ContainerModifier(final Consumer> modifier) { + this.modifier = modifier; + } + + @Override + public Consumer> modifier() { + return modifier; + } + + } + + static public MsSQLTestDatabase in(final BaseImage imageName, final ContainerModifier... modifiers) { + final var container = new MsSQLContainerFactory().shared(imageName.reference, modifiers); + final MsSQLTestDatabase testdb; + if (ENABLE_BACKGROUND_THREADS) { + testdb = new MsSqlTestDatabaseWithBackgroundThreads(container); + } else { + testdb = new MsSQLTestDatabase(container); + } + return testdb + .withConnectionProperty("encrypt", "false") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized(); + } + + public MsSQLTestDatabase(final MSSQLServerContainer container) { + super(container); + LOGGER.info("creating new database. databaseId=" + this.databaseId + ", databaseName=" + getDatabaseName()); + } + + public MsSQLTestDatabase withCdc() { + LOGGER.info("enabling CDC on database {} with id {}", getDatabaseName(), databaseId); + with("EXEC sys.sp_cdc_enable_db;"); + LOGGER.info("CDC enabled on database {} with id {}", getDatabaseName(), databaseId); + return this; + } + + private static final String RETRYABLE_CDC_TABLE_ENABLEMENT_ERROR_CONTENT = + "The error returned was 14258: 'Cannot perform this operation while SQLServerAgent is starting. Try again later.'"; + private static final String ENABLE_CDC_SQL_FMT = """ + EXEC sys.sp_cdc_enable_table + \t@source_schema = N'%s', + \t@source_name = N'%s', + \t@role_name = %s, + \t@supports_net_changes = 0, + \t@capture_instance = N'%s'"""; + private final Set CDC_INSTANCE_NAMES = Sets.newConcurrentHashSet(); + + public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName) { + return withCdcForTable(schemaName, tableName, roleName, "%s_%s".formatted(schemaName, tableName)); + } + + public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName, String instanceName) { + LOGGER.info(formatLogLine("enabling CDC for table {}.{} and role {}, instance {}"), schemaName, tableName, roleName, instanceName); + String sqlRoleName = roleName == null ? "NULL" : "N'%s'".formatted(roleName); + for (int tryCount = 0; tryCount < MAX_RETRIES; tryCount++) { + try { + Thread.sleep(1_000); + synchronized (getContainer()) { + LOGGER.info(formatLogLine("Trying to enable CDC for table {}.{} and role {}, instance {}, try {}/{}"), schemaName, tableName, roleName, + instanceName, tryCount, MAX_RETRIES); + with(ENABLE_CDC_SQL_FMT.formatted(schemaName, tableName, sqlRoleName, instanceName)); + } + CDC_INSTANCE_NAMES.add(instanceName); + return withShortenedCapturePollingInterval(); + } catch (DataAccessException e) { + if (!e.getMessage().contains(RETRYABLE_CDC_TABLE_ENABLEMENT_ERROR_CONTENT)) { + throw e; + } + tryCount++; + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + throw new RuntimeException(formatLogLine("failed to enable CDC for table %s.%s within %d seconds").formatted(schemaName, tableName, MAX_RETRIES)); + } + + private static final String DISABLE_CDC_SQL_FMT = """ + EXEC sys.sp_cdc_disable_table + \t@source_schema = N'%s', + \t@source_name = N'%s', + \t@capture_instance = N'%s' + """; + + public MsSQLTestDatabase withCdcDisabledForTable(String schemaName, String tableName, String instanceName) { + LOGGER.info(formatLogLine("disabling CDC for table {}.{}, instance {}"), schemaName, tableName, instanceName); + if (!CDC_INSTANCE_NAMES.remove(instanceName)) { + throw new RuntimeException(formatLogLine("CDC was disabled for instance ") + instanceName); + } + synchronized (getContainer()) { + return with(DISABLE_CDC_SQL_FMT.formatted(schemaName, tableName, instanceName)); + } + } + + private static final String DISABLE_CDC_SQL = "EXEC sys.sp_cdc_disable_db;"; + + public MsSQLTestDatabase withoutCdc() { + CDC_INSTANCE_NAMES.clear(); + synchronized (getContainer()) { + return with(DISABLE_CDC_SQL); + } + } + + public MsSQLTestDatabase withAgentStarted() { + return with("EXEC master.dbo.xp_servicecontrol N'START', N'SQLServerAGENT';"); + } + + public MsSQLTestDatabase withAgentStopped() { + return with("EXEC master.dbo.xp_servicecontrol N'STOP', N'SQLServerAGENT';"); + } + + public MsSQLTestDatabase withWaitUntilAgentRunning() { + waitForAgentState(true); + return self(); + } + + public MsSQLTestDatabase withWaitUntilAgentStopped() { + waitForAgentState(false); + return self(); + } + + public MsSQLTestDatabase waitForCdcRecords(String schemaName, String tableName, int recordCount) { + return waitForCdcRecords(schemaName, tableName, "%s_%s".formatted(schemaName, tableName), recordCount); + } + + public MsSQLTestDatabase waitForCdcRecords(String schemaName, String tableName, String cdcInstanceName, int recordCount) { + if (!CDC_INSTANCE_NAMES.contains(cdcInstanceName)) { + throw new RuntimeException("CDC is not enabled on instance %s".formatted(cdcInstanceName)); + } + String sql = "SELECT count(*) FROM cdc.%s_ct".formatted(cdcInstanceName); + int actualRecordCount = 0; + for (int tryCount = 0; tryCount < MAX_RETRIES; tryCount++) { + LOGGER.info(formatLogLine("fetching the number of CDC records for {}.{}, instance {}"), schemaName, tableName, cdcInstanceName); + try { + Thread.sleep(1_000); + actualRecordCount = query(ctx -> ctx.fetch(sql)).get(0).get(0, Integer.class); + } catch (SQLException | DataAccessException e) { + actualRecordCount = 0; + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + LOGGER.info(formatLogLine("Found {} CDC records for {}.{} in instance {}. Expecting {}. Trying again ({}/{}"), actualRecordCount, schemaName, + tableName, cdcInstanceName, + recordCount, tryCount, MAX_RETRIES); + if (actualRecordCount >= recordCount) { + LOGGER.info(formatLogLine("found {} records after {} tries!"), actualRecordCount, tryCount); + return self(); + } + } + throw new RuntimeException(formatLogLine( + "failed to find %d records after %s seconds. Only found %d!").formatted(recordCount, MAX_RETRIES, actualRecordCount)); + } + + private boolean shortenedPollingIntervalEnabled = false; + + public MsSQLTestDatabase withShortenedCapturePollingInterval() { + if (!shortenedPollingIntervalEnabled) { + synchronized (getContainer()) { + shortenedPollingIntervalEnabled = true; + with("EXEC sys.sp_cdc_change_job @job_type = 'capture', @pollinginterval = 1;"); + } + } + return this; + } + + private void waitForAgentState(final boolean running) { + final String expectedValue = running ? "Running." : "Stopped."; + LOGGER.info(formatLogLine("Waiting for SQLServerAgent state to change to '{}'."), expectedValue); + for (int i = 0; i < MAX_RETRIES; i++) { + try { + Thread.sleep(1_000); + final var r = query(ctx -> ctx.fetch("EXEC master.dbo.xp_servicecontrol 'QueryState', N'SQLServerAGENT';").get(0)); + if (expectedValue.equalsIgnoreCase(r.getValue(0).toString())) { + LOGGER.info(formatLogLine("SQLServerAgent state is '{}', as expected."), expectedValue); + return; + } + LOGGER.info(formatLogLine("Retrying, SQLServerAgent state {} does not match expected '{}'."), r, expectedValue); + } catch (final SQLException e) { + LOGGER.info(formatLogLine("Retrying agent state query after catching exception {}."), e.getMessage()); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + throw new RuntimeException(formatLogLine("Exhausted retry attempts while polling for agent state")); + } + + public static final String MAX_LSN_QUERY = "SELECT sys.fn_cdc_get_max_lsn();"; + + public MsSQLTestDatabase withWaitUntilMaxLsnAvailable() { + LOGGER.info(formatLogLine("Waiting for max LSN to become available for database {}."), getDatabaseName()); + for (int i = 0; i < MAX_RETRIES; i++) { + try { + Thread.sleep(1_000); + final var maxLSN = query(ctx -> ctx.fetch(MAX_LSN_QUERY).get(0).get(0, byte[].class)); + if (maxLSN != null) { + LOGGER.info(formatLogLine("Max LSN available for database {}: {}"), getDatabaseName(), Lsn.valueOf(maxLSN)); + return self(); + } + LOGGER.info(formatLogLine("Retrying, max LSN still not available for database {}."), getDatabaseName()); + } catch (final SQLException e) { + LOGGER.info(formatLogLine("Retrying max LSN query after catching exception {}"), e.getMessage()); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + throw new RuntimeException("Exhausted retry attempts while polling for max LSN availability"); + } + + @Override + public String getPassword() { + return "S00p3rS33kr3tP4ssw0rd!"; + } + + @Override + public String getJdbcUrl() { + return String.format("jdbc:sqlserver://%s:%d", getContainer().getHost(), getContainer().getFirstMappedPort()); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + return Stream.of( + mssqlCmd(Stream.of(String.format("CREATE DATABASE %s", getDatabaseName()))), + mssqlCmd(Stream.of( + String.format("USE %s", getDatabaseName()), + String.format("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", getUserName(), getPassword(), getDatabaseName()), + String.format("ALTER SERVER ROLE [sysadmin] ADD MEMBER %s", getUserName()), + String.format("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", getUserName(), getUserName()), + String.format("ALTER ROLE [db_owner] ADD MEMBER %s", getUserName())))); + } + + /** + * Don't drop anything when closing the test database. Instead, if cleanup is required, call + * {@link #dropDatabaseAndUser()} explicitly. Implicit cleanups may result in deadlocks and so + * aren't really worth it. + */ + @Override + protected Stream inContainerUndoBootstrapCmd() { + return Stream.empty(); + } + + public void dropDatabaseAndUser() { + execInContainer(mssqlCmd(Stream.of( + String.format("USE master"), + String.format("ALTER DATABASE %s SET single_user WITH ROLLBACK IMMEDIATE", getDatabaseName()), + String.format("DROP DATABASE %s", getDatabaseName())))); + } + + public Stream mssqlCmd(final Stream sql) { + return Stream.of("/opt/mssql-tools18/bin/sqlcmd", + "-U", getContainer().getUsername(), + "-P", getContainer().getPassword(), + "-Q", sql.collect(Collectors.joining("; ")), + "-b", "-e", "-C"); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.MSSQLSERVER; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.DEFAULT; + } + + public static enum CertificateKey { + + CA(true), + DUMMY_CA(false), + SERVER(true), + DUMMY_SERVER(false), + SERVER_DUMMY_CA(false), + ; + + public final boolean isValid; + + CertificateKey(final boolean isValid) { + this.isValid = isValid; + } + + } + + private volatile Map cachedCerts = new ConcurrentHashMap<>(); + + public String getCertificate(final CertificateKey certificateKey) { + if (!cachedCerts.containsKey(certificateKey)) { + final String certificate; + try { + final String command = "cat /tmp/certs/" + certificateKey.name().toLowerCase() + ".crt"; + certificate = getContainer().execInContainer("bash", "-c", command).getStdout().trim(); + } catch (final IOException e) { + throw new UncheckedIOException(e); + } catch (final InterruptedException e) { + throw new RuntimeException(e); + } + synchronized (cachedCerts) { + this.cachedCerts.put(certificateKey, certificate); + } + } + return cachedCerts.get(certificateKey); + } + + @Override + public MsSQLConfigBuilder configBuilder() { + return new MsSQLConfigBuilder(this); + } + + static public class MsSQLConfigBuilder extends ConfigBuilder { + + protected MsSQLConfigBuilder(final MsSQLTestDatabase testDatabase) { + + super(testDatabase); + with(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=2"); + + } + + public MsSQLConfigBuilder withCdcReplication() { + return with("is_test", true) + .with("replication_method", Map.of( + "method", "CDC", + "initial_waiting_seconds", DEFAULT_CDC_REPLICATION_INITIAL_WAIT.getSeconds(), + INVALID_CDC_CURSOR_POSITION_PROPERTY, RESYNC_DATA_OPTION)); + } + + public MsSQLConfigBuilder withSchemas(final String... schemas) { + return with(JdbcUtils.SCHEMAS_KEY, List.of(schemas)); + } + + @Override + public MsSQLConfigBuilder withoutSsl() { + return withSsl(Map.of("ssl_method", "unencrypted")); + } + + @Deprecated + public MsSQLConfigBuilder withSsl(final Map sslMode) { + return with("ssl_method", sslMode); + } + + public MsSQLConfigBuilder withEncrytedTrustServerCertificate() { + return withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate")); + } + + public MsSQLConfigBuilder withEncrytedVerifyServerCertificate(final String certificate, final String hostnameInCertificate) { + if (hostnameInCertificate != null) { + return withSsl(Map.of("ssl_method", "encrypted_verify_certificate", + "certificate", certificate, + "hostNameInCertificate", hostnameInCertificate)); + } else { + return withSsl(Map.of("ssl_method", "encrypted_verify_certificate", + "certificate", certificate)); + } + } + + } + + @Override + public void close() { + MssqlDebeziumStateUtil.disposeInitialState(); + super.close(); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java b/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java new file mode 100644 index 0000000000000..2d6be3457d518 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src.bak/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.commons.logging.MdcScope; +import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.exception.ExceptionUtils; +import org.jooq.Record; +import org.jooq.exception.DataAccessException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.MSSQLServerContainer; + +public class MsSqlTestDatabaseWithBackgroundThreads extends MsSQLTestDatabase { + + private abstract class AbstractMssqlTestDatabaseBackgroundThread extends Thread { + + protected Logger LOGGER = LoggerFactory.getLogger(this.getClass()); + protected final boolean PRINT_EVERY_CALL = false; + + AbstractMssqlTestDatabaseBackgroundThread() { + this.start(); + } + + protected volatile boolean stop = false; + + protected String formatLogLine(String logLine) { + String retVal = this.getClass().getSimpleName() + " databaseId=" + databaseId + ", containerId=" + containerId + " - " + logLine; + return retVal; + } + + @SuppressWarnings("try") + public void run() { + try (MdcScope mdcScope = new MdcScope.Builder().setPrefixColor(Color.PURPLE_BACKGROUND).setLogPrefix(this.getClass().getSimpleName()) + .build()) { + while (!stop) { + try { + Thread.sleep(100); + innerRun(); + } catch (final Throwable t) { + LOGGER.info(formatLogLine( + "got exception of type " + t.getClass() + ":" + StringUtils.replace(t.getMessage() + "\n" + formatStackTrace(t), "\n", "\\n"))); + } + } + } + } + + private String formatStackTrace(Throwable t) { + boolean belowCurrentCall = false; + List stackToDisplay = new LinkedList(); + for (String stackString : ExceptionUtils.getStackFrames(t)) { + if (stackString.startsWith("\tat ")) { + if (!belowCurrentCall && stackString.contains(AbstractMssqlTestDatabaseBackgroundThread.class.getSimpleName())) { + belowCurrentCall = true; + } + } else { + belowCurrentCall = false; + } + if (!belowCurrentCall) { + stackToDisplay.add(stackString); + } + } + return StringUtils.join(stackToDisplay, "\n "); + } + + public abstract void innerRun() throws Exception; + + } + + private class MssqlTestDatabaseBackgroundThreadAgentState extends AbstractMssqlTestDatabaseBackgroundThread { + + private String previousValue = null; + + @Override + public void innerRun() throws Exception { + String agentStateSql = "EXEC master.dbo.xp_servicecontrol 'QueryState', N'SQLServerAGENT';"; + final var r = query(ctx -> ctx.fetch(agentStateSql).get(0)); + String agentState = r.getValue(0).toString(); + if (PRINT_EVERY_CALL || !Objects.equals(agentState, previousValue)) { + LOGGER.info(formatLogLine("agentState changed from {} to {}"), previousValue, agentState); + previousValue = agentState; + } + + } + + } + + private class MssqlTestDatabaseBackgroundThreadFnCdcGetMaxLsn extends AbstractMssqlTestDatabaseBackgroundThread { + + private String previousValue = null; + + @Override + public void innerRun() throws Exception { + String max_lsn; + try { + Object retVal = query(ctx -> ctx.fetch(MAX_LSN_QUERY)).get(0).getValue(0); + if (retVal instanceof byte[] bytes) { + max_lsn = new String(Base64.getEncoder().encode(bytes), StandardCharsets.UTF_8); + } else { + max_lsn = String.valueOf(retVal); + } + } catch (DataAccessException e) { + if (e.getMessage().contains("Invalid object name 'cdc.lsn_time_mapping'")) { + max_lsn = "DataAccessException " + e.getMessage(); + } else { + throw e; + } + } + if (PRINT_EVERY_CALL || !Objects.equals(max_lsn, previousValue)) { + LOGGER.info(formatLogLine("sys.fn_cdc_get_max_lsn changed from {} to {}"), previousValue, max_lsn); + previousValue = max_lsn; + } + } + + } + + private class MssqlTestDatabaseBackgroundThreadLsnTimeMapping extends AbstractMssqlTestDatabaseBackgroundThread { + + private String previousValue = null; + private static final String LSN_TIME_MAPPING_QUERY = "SELECT start_lsn, tran_begin_time, tran_end_time, tran_id FROM cdc.lsn_time_mapping;"; + + @Override + public void innerRun() throws Exception { + String results; + try { + results = query(ctx -> ctx.fetch(LSN_TIME_MAPPING_QUERY)).toString(); + } catch (DataAccessException e) { + if (e.getMessage().contains("Invalid object name 'cdc.lsn_time_mapping'")) { + results = "DataAccessException " + e.getMessage(); + } else { + throw e; + } + } + if (PRINT_EVERY_CALL || !Objects.equals(results, previousValue)) { + LOGGER.info(formatLogLine("sys.lsn_time_mapping changed from {} to {}"), previousValue, results); + previousValue = results; + } + } + + } + + private class MssqlTestDatabaseBackgroundThreadQueryJobsTable extends AbstractMssqlTestDatabaseBackgroundThread { + + private String previousValue = null; + private int previousRowCount = -1; + private static final String JOBS_TABLE_QUERY = "SELECT * FROM msdb.dbo.cdc_jobs"; + + @Override + public void innerRun() throws Exception { + int resultSize = 0; + String resultsAsString; + try { + List results = query(ctx -> ctx.fetch(JOBS_TABLE_QUERY)); + resultsAsString = results.toString(); + resultSize = results.size(); + } catch (DataAccessException e) { + if (e.getMessage().contains("Invalid object name 'msdb.dbo.cdc_jobs'")) { + resultsAsString = "DataAccessException " + e.getMessage(); + } else { + throw e; + } + } + if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) { + LOGGER.info(formatLogLine("cdc.change_tables changed from {} rows\n{} to {} rows\n{}"), previousRowCount, previousValue, resultSize, + resultsAsString); + previousValue = resultsAsString; + previousRowCount = resultSize; + } + } + + } + + private class MssqlTestDatabaseBackgroundThreadQueryChangeTables extends AbstractMssqlTestDatabaseBackgroundThread { + + private String previousValue = null; + private int previousRowCount = -1; + private static final String CHANGE_TABLES_QUERY = """ + SELECT OBJECT_SCHEMA_NAME(source_object_id, DB_ID('%s')), + OBJECT_NAME(source_object_id, DB_ID('%s')), + capture_instance, + object_id, + start_lsn FROM cdc.change_tables"""; + + @Override + public void innerRun() throws Exception { + int resultSize = 0; + String resultsAsString; + try { + List results = query(ctx -> ctx.fetch(CHANGE_TABLES_QUERY.formatted(getDatabaseName(), getDatabaseName()))); + resultsAsString = results.toString(); + resultSize = results.size(); + } catch (DataAccessException e) { + if (e.getMessage().contains("Invalid object name 'cdc.change_tables'")) { + resultsAsString = "DataAccessException " + e.getMessage(); + } else { + throw e; + } + } + if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) { + LOGGER.info(formatLogLine("cdc.change_tables changed from {} rows\n{} to {} rows\n{}"), previousRowCount, previousValue, resultSize, + resultsAsString); + previousValue = resultsAsString; + previousRowCount = resultSize; + } + } + + } + + private class MssqlTestDatabaseBackgroundThreadQueryCdcTable extends AbstractMssqlTestDatabaseBackgroundThread { + + private final String schemaName; + private final String tableName; + private final String instanceName; + private String previousValue = null; + private int previousRowCount = -1; + + MssqlTestDatabaseBackgroundThreadQueryCdcTable(String schemaName, String tableName, String instanceName) { + this.schemaName = schemaName; + this.tableName = tableName; + this.instanceName = instanceName; + } + + private static final String CDC_TABLE_SELECT_QUERY_STRING = "SELECT * FROM cdc.%s_ct"; + + @Override + public void innerRun() throws Exception { + int resultSize = 0; + String resultsAsString; + try { + List results = query(ctx -> ctx.fetch(CDC_TABLE_SELECT_QUERY_STRING.formatted(instanceName))); + resultsAsString = results.toString(); + resultSize = results.size(); + } catch (DataAccessException e) { + if (e.getMessage().contains("Invalid object name 'cdc.%s_ct'".formatted(instanceName))) { + resultsAsString = "DataAccessException " + e.getMessage(); + } else { + throw e; + } + } + if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) { + LOGGER.info(formatLogLine("cdc table {} for {}.{} changed from {} rows\n{} to {} rows\n{}"), instanceName, schemaName, tableName, + previousRowCount, previousValue, resultSize, + resultsAsString); + previousValue = resultsAsString; + previousRowCount = resultSize; + } + } + + } + + private final List bgThreads = new ArrayList<>(); + + MsSqlTestDatabaseWithBackgroundThreads(MSSQLServerContainer container) { + super(container); + + } + + public MsSQLTestDatabase initialized() { + super.initialized(); + bgThreads.add(new MssqlTestDatabaseBackgroundThreadAgentState()); + bgThreads.add(new MssqlTestDatabaseBackgroundThreadFnCdcGetMaxLsn()); + bgThreads.add(new MssqlTestDatabaseBackgroundThreadLsnTimeMapping()); + bgThreads.add(new MssqlTestDatabaseBackgroundThreadQueryChangeTables()); + bgThreads.add(new MssqlTestDatabaseBackgroundThreadQueryJobsTable()); + return self(); + } + + public void close() { + for (var bgThread : bgThreads) { + bgThread.stop = true; + } + super.close(); + MssqlDebeziumStateUtil.disposeInitialState(); + } + + private final Map bgThreadByInstance = new ConcurrentHashMap<>(); + + @Override + public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName, String instanceName) { + super.withCdcForTable(schemaName, tableName, roleName, instanceName); + MssqlTestDatabaseBackgroundThreadQueryCdcTable bgThread = new MssqlTestDatabaseBackgroundThreadQueryCdcTable(schemaName, tableName, instanceName); + bgThreadByInstance.put(instanceName, bgThread); + bgThreads.add(bgThread); + return this; + } + + @Override + public MsSQLTestDatabase withCdcDisabledForTable(String schemaName, String tableName, String instanceName) { + bgThreadByInstance.get(instanceName).stop = true; + super.withCdcDisabledForTable(schemaName, tableName, instanceName); + return this; + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlJdbcPartitionFactory.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlJdbcPartitionFactory.kt new file mode 100644 index 0000000000000..85c2e53b19235 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlJdbcPartitionFactory.kt @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mysql + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.BinaryNode +import io.airbyte.cdk.ConfigErrorException +import io.airbyte.cdk.StreamIdentifier +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.data.LeafAirbyteSchemaType +import io.airbyte.cdk.data.LocalDateTimeCodec +import io.airbyte.cdk.data.LocalDateTimeCodec.formatter +import io.airbyte.cdk.data.OffsetDateTimeCodec +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.jdbc.JdbcFieldType +import io.airbyte.cdk.read.ConfiguredSyncMode +import io.airbyte.cdk.read.DefaultJdbcSharedState +import io.airbyte.cdk.read.DefaultJdbcStreamState +import io.airbyte.cdk.read.From +import io.airbyte.cdk.read.JdbcPartitionFactory +import io.airbyte.cdk.read.SelectColumnMaxValue +import io.airbyte.cdk.read.SelectQuerySpec +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.read.StreamFeedBootstrap +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.* +import io.micronaut.context.annotation.Primary +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter +import java.time.format.DateTimeParseException +import java.util.Base64 +import java.util.concurrent.ConcurrentHashMap +import javax.inject.Singleton + +@Primary +@Singleton +class MsSqlServerJdbcPartitionFactory( + override val sharedState: DefaultJdbcSharedState, + val selectQueryGenerator: MsSqlServerSelectQueryGenerator, + val config: MsSqlServerSourceConfiguration, +) : + JdbcPartitionFactory< + DefaultJdbcSharedState, + DefaultJdbcStreamState, + MsSqlServerJdbcPartition, + > { + + private val streamStates = ConcurrentHashMap() + + override fun streamState(streamFeedBootstrap: StreamFeedBootstrap): DefaultJdbcStreamState = + streamStates.getOrPut(streamFeedBootstrap.feed.id) { + DefaultJdbcStreamState(sharedState, streamFeedBootstrap) + } + + private fun findPkUpperBound(stream: Stream, pkChosenFromCatalog: List): JsonNode { + // find upper bound using maxPk query + val jdbcConnectionFactory = JdbcConnectionFactory(config) + val from = From(stream.name, stream.namespace) + val maxPkQuery = SelectQuerySpec(SelectColumnMaxValue(pkChosenFromCatalog[0]), from) + + jdbcConnectionFactory.get().use { connection -> + val stmt = connection.prepareStatement(selectQueryGenerator.generate(maxPkQuery).sql) + val rs = stmt.executeQuery() + + if (rs.next()) { + val jdbcFieldType = pkChosenFromCatalog[0].type as JdbcFieldType<*> + val pkUpperBound: JsonNode = jdbcFieldType.get(rs, 1) + return pkUpperBound + } else { + // Table might be empty thus there is no max PK value. + return Jsons.nullNode() + } + } + } + + private fun coldStart(streamState: DefaultJdbcStreamState): MsSqlServerJdbcPartition { + val stream: Stream = streamState.stream + val pkChosenFromCatalog: List = stream.configuredPrimaryKey ?: listOf() + + if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) { + if (pkChosenFromCatalog.isEmpty()) { + return MsSqlServerJdbcNonResumableSnapshotPartition( + selectQueryGenerator, + streamState, + ) + } + + val upperBound = findPkUpperBound(stream, pkChosenFromCatalog) + if (sharedState.configuration.global) { + return MsSqlServerJdbcCdcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = null, + upperBound = listOf(upperBound) + ) + } else { + MsSqlServerJdbcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = null, + upperBound = listOf(upperBound) + ) + } + } + + if (sharedState.configuration.global) { + return MsSqlServerJdbcCdcSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = null, + ) + } + + val cursorChosenFromCatalog: Field = + stream.configuredCursor as? Field ?: throw ConfigErrorException("no cursor") + + if (pkChosenFromCatalog.isEmpty()) { + return MsSqlServerJdbcNonResumableSnapshotWithCursorPartition( + selectQueryGenerator, + streamState, + cursorChosenFromCatalog + ) + } + return MsSqlServerJdbcSnapshotWithCursorPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = null, + cursorChosenFromCatalog, + cursorUpperBound = null, + ) + } + + /** + * Flowchart: + * 1. If the input state is null - using coldstart. + * ``` + * a. If it's global but without PK, use non-resumable snapshot. + * b. If it's global with PK, use snapshot. + * c. If it's not global, use snapshot with cursor. + * ``` + * 2. If the input state is not null - + * ``` + * a. If it's in global mode, JdbcPartitionFactory will not handle this. (TODO) + * b. If it's cursor based, it could be either in PK read phase (initial read) or + * cursor read phase (incremental read). This is differentiated by the stateType. + * i. In PK read phase, use snapshot with cursor. If no PKs were found, + * use non-resumable snapshot with cursor. + * ii. In cursor read phase, use cursor incremental. + * ``` + */ + override fun create(streamFeedBootstrap: StreamFeedBootstrap): MsSqlServerJdbcPartition? { + val stream: Stream = streamFeedBootstrap.feed + val streamState: DefaultJdbcStreamState = streamState(streamFeedBootstrap) + val opaqueStateValue: OpaqueStateValue = + streamFeedBootstrap.currentState ?: return coldStart(streamState) + + val isCursorBased: Boolean = !sharedState.configuration.global + + val pkChosenFromCatalog: List = stream.configuredPrimaryKey ?: listOf() + + if ( + pkChosenFromCatalog.isEmpty() && + stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH + ) { + if ( + streamState.streamFeedBootstrap.currentState == + MsSqlServerJdbcStreamStateValue.snapshotCompleted + ) { + return null + } + return MsSqlServerJdbcNonResumableSnapshotPartition( + selectQueryGenerator, + streamState, + ) + } + + if (!isCursorBased) { + val sv: MsSqlServerCdcInitialSnapshotStateValue = + Jsons.treeToValue(opaqueStateValue, MsSqlServerCdcInitialSnapshotStateValue::class.java) + + if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) { + val upperBound = findPkUpperBound(stream, pkChosenFromCatalog) + if (sv.pkVal == upperBound.asText()) { + return null + } + val pkLowerBound: JsonNode = stateValueToJsonNode(pkChosenFromCatalog[0], sv.pkVal) + + return MsSqlServerJdbcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound), + upperBound = listOf(upperBound) + ) + } + + if (sv.pkName == null) { + // This indicates initial snapshot has been completed. CDC snapshot will be handled + // by CDCPartitionFactory. + // Nothing to do here. + return null + } else { + // This branch indicates snapshot is incomplete. We need to resume based on previous + // snapshot state. + val pkField = pkChosenFromCatalog.first() + val pkLowerBound: JsonNode = stateValueToJsonNode(pkField, sv.pkVal) + + if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) { + val upperBound = findPkUpperBound(stream, pkChosenFromCatalog) + if (sv.pkVal == upperBound.asText()) { + return null + } + return MsSqlServerJdbcCdcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound), + upperBound = listOf(upperBound) + ) + } + return MsSqlServerJdbcCdcSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = listOf(pkLowerBound), + ) + } + } else { + val sv: MsSqlServerJdbcStreamStateValue = + Jsons.treeToValue(opaqueStateValue, MsSqlServerJdbcStreamStateValue::class.java) + + if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) { + val upperBound = findPkUpperBound(stream, pkChosenFromCatalog) + if (sv.pkValue == upperBound.asText()) { + return null + } + val pkLowerBound: JsonNode = + stateValueToJsonNode(pkChosenFromCatalog[0], sv.pkValue) + + return MsSqlServerJdbcCdcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound), + upperBound = listOf(upperBound) + ) + } + + if (sv.stateType != "cursor_based") { + // Loading value from catalog. Note there could be unexpected behaviors if user + // updates their schema but did not reset their state. + val pkField = pkChosenFromCatalog.first() + val pkLowerBound: JsonNode = stateValueToJsonNode(pkField, sv.pkValue) + + val cursorChosenFromCatalog: Field = + stream.configuredCursor as? Field ?: throw ConfigErrorException("no cursor") + + // in a state where it's still in primary_key read part. + return MsSqlServerJdbcSnapshotWithCursorPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = listOf(pkLowerBound), + cursorChosenFromCatalog, + cursorUpperBound = null, + ) + } + // resume back to cursor based increment. + val cursor: Field = stream.fields.find { it.id == sv.cursorField.first() } as Field + val cursorCheckpoint: JsonNode = stateValueToJsonNode(cursor, sv.cursors) + + // Compose a jsonnode of cursor label to cursor value to fit in + // DefaultJdbcCursorIncrementalPartition + if (cursorCheckpoint.toString() == streamState.cursorUpperBound?.toString()) { + // Incremental complete. + return null + } + return MsSqlServerJdbcCursorIncrementalPartition( + selectQueryGenerator, + streamState, + cursor, + cursorLowerBound = cursorCheckpoint, + isLowerBoundIncluded = false, + cursorUpperBound = streamState.cursorUpperBound, + ) + } + } + + private fun stateValueToJsonNode(field: Field, stateValue: String?): JsonNode { + when (field.type.airbyteSchemaType) { + is LeafAirbyteSchemaType -> + return when (field.type.airbyteSchemaType as LeafAirbyteSchemaType) { + LeafAirbyteSchemaType.INTEGER -> { + Jsons.valueToTree(stateValue?.toBigInteger()) + } + LeafAirbyteSchemaType.NUMBER -> { + Jsons.valueToTree(stateValue?.toDouble()) + } + LeafAirbyteSchemaType.BINARY -> { + val ba = Base64.getDecoder().decode(stateValue!!) + Jsons.valueToTree(ba) + } + LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE -> { + val timestampInStatePattern = "yyyy-MM-dd'T'HH:mm:ss" + try { + val formatter: DateTimeFormatter = + DateTimeFormatter.ofPattern(timestampInStatePattern) + Jsons.textNode( + LocalDateTime.parse(stateValue, formatter) + .format(LocalDateTimeCodec.formatter) + ) + } catch (e: DateTimeParseException) { + // Resolve to use the new format. + Jsons.valueToTree(stateValue) + } + } + LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE -> { + val timestampInStatePattern = "yyyy-MM-dd'T'HH:mm:ss" + try { + val formatter: DateTimeFormatter = + DateTimeFormatter.ofPattern(timestampInStatePattern) + Jsons.valueToTree( + LocalDateTime.parse(stateValue, formatter) + .minusDays(1) + .atOffset(java.time.ZoneOffset.UTC) + .format(OffsetDateTimeCodec.formatter) + ) + } catch (e: DateTimeParseException) { + // Resolve to use the new format. + Jsons.valueToTree(stateValue) + } + } + else -> Jsons.valueToTree(stateValue) + } + else -> + throw IllegalStateException( + "PK field must be leaf type but is ${field.type.airbyteSchemaType}." + ) + } + } + + override fun split( + unsplitPartition: MsSqlServerJdbcPartition, + opaqueStateValues: List + ): List { + // At this moment we don't support split on within mysql stream in any mode. + return listOf(unsplitPartition) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCdcInitialSnapshotStateValue.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCdcInitialSnapshotStateValue.kt new file mode 100644 index 0000000000000..7cc5722a0fb75 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCdcInitialSnapshotStateValue.kt @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.util.Jsons + +data class MsSqlServerCdcInitialSnapshotStateValue( + @JsonProperty("pk_val") val pkVal: String? = null, + @JsonProperty("pk_name") val pkName: String? = null, + @JsonProperty("version") val version: Int? = null, + @JsonProperty("state_type") val stateType: String? = null, + @JsonProperty("incremental_state") val incrementalState: JsonNode? = null, + @JsonProperty("stream_name") val streamName: String? = null, + @JsonProperty("cursor_field") val cursorField: List? = null, + @JsonProperty("stream_namespace") val streamNamespace: String? = null, +) { + companion object { + /** Value representing the completion of a FULL_REFRESH snapshot. */ + fun getSnapshotCompletedState(stream: Stream): OpaqueStateValue = + Jsons.valueToTree( + MsSqlServerCdcInitialSnapshotStateValue( + streamName = stream.name, + cursorField = listOf(), + streamNamespace = stream.namespace + ) + ) + + /** Value representing the progress of an ongoing snapshot. */ + fun snapshotCheckpoint( + primaryKey: List, + primaryKeyCheckpoint: List, + ): OpaqueStateValue { + val primaryKeyField = primaryKey.first() + return Jsons.valueToTree( + MsSqlServerCdcInitialSnapshotStateValue( + pkName = primaryKeyField.id, + pkVal = primaryKeyCheckpoint.first().asText(), + stateType = "primary_key", + ) + ) + } + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerDebeziumOperations.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerDebeziumOperations.kt new file mode 100644 index 0000000000000..0c9fcaa15463d --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerDebeziumOperations.kt @@ -0,0 +1,47 @@ +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.read.cdc.* +import io.airbyte.cdk.util.Jsons +import jakarta.inject.Singleton +import org.apache.kafka.connect.source.SourceRecord + +@Singleton +class MsSqlServerDebeziumOperations:DebeziumOperations { + override fun position(offset: DebeziumOffset): MsSqlServerDebeziumPosition { + return MsSqlServerDebeziumPosition() + } + + override fun position(recordValue: DebeziumRecordValue): MsSqlServerDebeziumPosition? { + return MsSqlServerDebeziumPosition() + } + + override fun position(sourceRecord: SourceRecord): MsSqlServerDebeziumPosition? { + return MsSqlServerDebeziumPosition() + } + + override fun synthesize(): DebeziumInput { + return DebeziumInput(isSynthetic = true, state = DebeziumState(DebeziumOffset(emptyMap()), null), properties = emptyMap()) + } + + override fun deserialize(opaqueStateValue: OpaqueStateValue, streams: List): DebeziumInput { + return DebeziumInput(isSynthetic = true, state = DebeziumState(DebeziumOffset(emptyMap()), null), properties = emptyMap()) } + + override fun deserialize(key: DebeziumRecordKey, value: DebeziumRecordValue): DeserializedRecord? { + return null + } + + override fun serialize(debeziumState: DebeziumState): OpaqueStateValue { + return Jsons.objectNode() + } +} + +class MsSqlServerDebeziumPosition: Comparable { + override fun compareTo(other: MsSqlServerDebeziumPosition): Int { + return 0 + } + +} + diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerFieldTypeMapper.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerFieldTypeMapper.kt new file mode 100644 index 0000000000000..9949d5c3eb8ca --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerFieldTypeMapper.kt @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.discover.FieldType +import io.airbyte.cdk.discover.JdbcMetadataQuerier +import io.airbyte.cdk.discover.SystemType +import io.airbyte.cdk.jdbc.* +import io.github.oshai.kotlinlogging.KotlinLogging +import io.micronaut.context.annotation.Primary +import jakarta.inject.Singleton +import java.sql.JDBCType + +private val log = KotlinLogging.logger {} +@Singleton +@Primary +class MsSqlServerFieldTypeMapper : JdbcMetadataQuerier.FieldTypeMapper { + override fun toFieldType(c: JdbcMetadataQuerier.ColumnMetadata): FieldType { + when (val type = c.type) { + is SystemType -> { + val retVal = leafType(type) + return retVal + } + else -> { + return PokemonFieldType + } + } + } + + private fun leafType(type: SystemType): JdbcFieldType<*> { + return MsSqlServerSqlType.fromName(type.typeName)?.jdbcType ?: + when(type.jdbcType) { + JDBCType.BIT -> BooleanFieldType + JDBCType.TINYINT -> ShortFieldType + JDBCType.SMALLINT -> ShortFieldType + JDBCType.INTEGER -> IntFieldType + JDBCType.BIGINT -> BigIntegerFieldType + JDBCType.FLOAT -> FloatFieldType + JDBCType.REAL -> DoubleFieldType + JDBCType.DOUBLE -> DoubleFieldType + JDBCType.NUMERIC -> DoubleFieldType + JDBCType.DECIMAL -> BigIntegerFieldType + JDBCType.CHAR -> StringFieldType + JDBCType.VARCHAR -> StringFieldType + JDBCType.LONGVARCHAR -> StringFieldType + JDBCType.DATE -> LocalDateFieldType + JDBCType.TIME -> LocalTimeFieldType + JDBCType.TIMESTAMP -> LocalDateTimeFieldType + JDBCType.BINARY -> BytesFieldType + JDBCType.VARBINARY -> BytesFieldType + JDBCType.LONGVARBINARY -> BytesFieldType + JDBCType.NULL -> NullFieldType + JDBCType.OTHER -> PokemonFieldType + JDBCType.JAVA_OBJECT -> PokemonFieldType + JDBCType.DISTINCT -> PokemonFieldType + JDBCType.STRUCT -> PokemonFieldType + JDBCType.ARRAY -> PokemonFieldType + JDBCType.BLOB -> BinaryStreamFieldType + JDBCType.CLOB -> CharacterStreamFieldType + JDBCType.REF -> PokemonFieldType + JDBCType.DATALINK -> PokemonFieldType + JDBCType.BOOLEAN -> BooleanFieldType + JDBCType.ROWID -> PokemonFieldType + JDBCType.NCHAR -> StringFieldType + JDBCType.NVARCHAR -> StringFieldType + JDBCType.LONGNVARCHAR -> StringFieldType + JDBCType.NCLOB -> CharacterStreamFieldType + JDBCType.SQLXML -> PokemonFieldType + JDBCType.REF_CURSOR -> PokemonFieldType + JDBCType.TIME_WITH_TIMEZONE -> OffsetTimeFieldType + JDBCType.TIMESTAMP_WITH_TIMEZONE -> OffsetDateTimeFieldType + null -> PokemonFieldType + } + } + + enum class MsSqlServerSqlType(val names: List, val jdbcType: JdbcFieldType<*>) { + BINARY(BinaryStreamFieldType, "VARBINARY", "BINARY"), + DATETIME_TYPES(LocalDateTimeFieldType, "DATETIME", "DATETIME2", "SMALLDATETIME"), + DATE(LocalDateFieldType, "DATE"), + DATETIMEOFFSET(OffsetDateTimeFieldType, "DATETIMEOFFSET"), + TIME_TYPE(LocalTimeFieldType, "TIME"), + SMALLMONEY_TYPE(PokemonFieldType, "SMALLMONEY"), + GEOMETRY(PokemonFieldType, "GEOMETRY"), + GEOGRAPHY(PokemonFieldType, "GEOGRAPHY"); + + constructor(jdbcType: JdbcFieldType<*>, vararg names: String) : this(names.toList(), jdbcType) { + } + + companion object { + private val nameToValue = MsSqlServerSqlType.entries.flatMap { + msSqlServerSqlType -> msSqlServerSqlType.names.map { + name -> name to msSqlServerSqlType + } + }.toMap() + + fun fromName(name: String?): MsSqlServerSqlType? { + val retVal = nameToValue[name] + return retVal + } + } + } + + companion object { + val DATETIME_FORMAT_MICROSECONDS = "yyyy-MM-dd'T'HH:mm:ss[.][SSSSSS]" + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartition.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartition.kt new file mode 100644 index 0000000000000..1b3d9562cd342 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartition.kt @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.read.And +import io.airbyte.cdk.read.DefaultJdbcStreamState +import io.airbyte.cdk.read.Equal +import io.airbyte.cdk.read.From +import io.airbyte.cdk.read.FromSample +import io.airbyte.cdk.read.Greater +import io.airbyte.cdk.read.GreaterOrEqual +import io.airbyte.cdk.read.JdbcCursorPartition +import io.airbyte.cdk.read.JdbcPartition +import io.airbyte.cdk.read.JdbcSplittablePartition +import io.airbyte.cdk.read.Lesser +import io.airbyte.cdk.read.LesserOrEqual +import io.airbyte.cdk.read.Limit +import io.airbyte.cdk.read.NoWhere +import io.airbyte.cdk.read.Or +import io.airbyte.cdk.read.OrderBy +import io.airbyte.cdk.read.SelectColumnMaxValue +import io.airbyte.cdk.read.SelectColumns +import io.airbyte.cdk.read.SelectQuery +import io.airbyte.cdk.read.SelectQueryGenerator +import io.airbyte.cdk.read.SelectQuerySpec +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.read.Where +import io.airbyte.cdk.read.WhereClauseLeafNode +import io.airbyte.cdk.read.WhereClauseNode +import io.airbyte.cdk.read.optimize +import io.airbyte.cdk.util.Jsons + +/** Base class for default implementations of [JdbcPartition] for non resumable partitions. */ +sealed class MsSqlServerJdbcPartition( + val selectQueryGenerator: SelectQueryGenerator, + streamState: DefaultJdbcStreamState, +) : JdbcPartition { + val stream: Stream = streamState.stream + val from = From(stream.name, stream.namespace) + + override val nonResumableQuery: SelectQuery + get() = selectQueryGenerator.generate(nonResumableQuerySpec.optimize()) + + open val nonResumableQuerySpec = SelectQuerySpec(SelectColumns(stream.fields), from) + + override fun samplingQuery(sampleRateInvPow2: Int): SelectQuery { + val sampleSize: Int = streamState.sharedState.maxSampleSize + val querySpec = + SelectQuerySpec( + SelectColumns(stream.fields), + From(stream.name, stream.namespace), + limit = Limit(sampleSize.toLong()), + ) + return selectQueryGenerator.generate(querySpec.optimize()) + } +} + +/** Default implementation of a [JdbcPartition] for an unsplittable snapshot partition. */ +class MsSqlServerJdbcNonResumableSnapshotPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, +) : MsSqlServerJdbcPartition(selectQueryGenerator, streamState) { + + override val completeState: OpaqueStateValue = MsSqlServerJdbcStreamStateValue.snapshotCompleted +} + +/** + * Default implementation of a [JdbcPartition] for an non resumable snapshot partition preceding a + * cursor-based incremental sync. + */ +class MsSqlServerJdbcNonResumableSnapshotWithCursorPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + val cursor: Field, +) : + MsSqlServerJdbcPartition(selectQueryGenerator, streamState), + JdbcCursorPartition { + + override val completeState: OpaqueStateValue + get() = + MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint( + cursor, + cursorCheckpoint = streamState.cursorUpperBound!!, + streamState.stream, + ) + + override val cursorUpperBoundQuery: SelectQuery + get() = selectQueryGenerator.generate(cursorUpperBoundQuerySpec.optimize()) + + val cursorUpperBoundQuerySpec = SelectQuerySpec(SelectColumnMaxValue(cursor), from) +} + +/** Base class for default implementations of [JdbcPartition] for partitions. */ +sealed class MsSqlServerJdbcResumablePartition( + selectQueryGenerator: SelectQueryGenerator, + streamState: DefaultJdbcStreamState, + val checkpointColumns: List, +) : + MsSqlServerJdbcPartition(selectQueryGenerator, streamState), + JdbcSplittablePartition { + abstract val lowerBound: List? + abstract val upperBound: List? + + override val nonResumableQuery: SelectQuery + get() = selectQueryGenerator.generate(nonResumableQuerySpec.optimize()) + + override val nonResumableQuerySpec: SelectQuerySpec + get() = SelectQuerySpec(SelectColumns(stream.fields), from, where) + + override fun resumableQuery(limit: Long): SelectQuery { + val querySpec = + SelectQuerySpec( + SelectColumns((stream.fields + checkpointColumns).distinct()), + from, + where, + OrderBy(checkpointColumns), + Limit(limit), + ) + return selectQueryGenerator.generate(querySpec.optimize()) + } + + override fun samplingQuery(sampleRateInvPow2: Int): SelectQuery { + val sampleSize: Int = streamState.sharedState.maxSampleSize + val querySpec = + SelectQuerySpec( + SelectColumns(stream.fields + checkpointColumns), + FromSample(stream.name, stream.namespace, sampleRateInvPow2, sampleSize), + NoWhere, + OrderBy(checkpointColumns), + Limit(sampleSize.toLong()) + ) + return selectQueryGenerator.generate(querySpec.optimize()) + } + + val where: Where + get() { + val zippedLowerBound: List> = + lowerBound?.let { checkpointColumns.zip(it) } ?: listOf() + val lowerBoundDisj: List = + zippedLowerBound.mapIndexed { idx: Int, (gtCol: Field, gtValue: JsonNode) -> + val lastLeaf: WhereClauseLeafNode = + if (isLowerBoundIncluded && idx == checkpointColumns.size - 1) { + GreaterOrEqual(gtCol, gtValue) + } else { + Greater(gtCol, gtValue) + } + And( + zippedLowerBound.take(idx).map { (eqCol: Field, eqValue: JsonNode) -> + Equal(eqCol, eqValue) + } + listOf(lastLeaf), + ) + } + val zippedUpperBound: List> = + upperBound?.let { checkpointColumns.zip(it) } ?: listOf() + val upperBoundDisj: List = + zippedUpperBound.mapIndexed { idx: Int, (leqCol: Field, leqValue: JsonNode) -> + val lastLeaf: WhereClauseLeafNode = + if (idx < zippedUpperBound.size - 1) { + Lesser(leqCol, leqValue) + } else { + LesserOrEqual(leqCol, leqValue) + } + And( + zippedUpperBound.take(idx).map { (eqCol: Field, eqValue: JsonNode) -> + Equal(eqCol, eqValue) + } + listOf(lastLeaf), + ) + } + return Where(And(Or(lowerBoundDisj), Or(upperBoundDisj))) + } + + open val isLowerBoundIncluded: Boolean = false +} + +/** RFR for cursor based read. */ +class MsSqlServerJdbcRfrSnapshotPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + primaryKey: List, + override val lowerBound: List?, + override val upperBound: List?, +) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) { + + // TODO: this needs to reflect lastRecord. Complete state needs to have last primary key value + // in RFR case. + override val completeState: OpaqueStateValue + get() = + MsSqlServerJdbcStreamStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = + checkpointColumns.map { upperBound?.get(0) ?: Jsons.nullNode() }, + ) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerJdbcStreamStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() }, + ) +} + +/** RFR for CDC. */ +class MsSqlServerJdbcCdcRfrSnapshotPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + primaryKey: List, + override val lowerBound: List?, + override val upperBound: List?, +) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) { + + override val completeState: OpaqueStateValue + get() = + MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = + checkpointColumns.map { upperBound?.get(0) ?: Jsons.nullNode() }, + ) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() }, + ) +} + +/** + * Implementation of a [JdbcPartition] for a CDC snapshot partition. Used for incremental CDC + * initial sync. + */ +class MsSqlServerJdbcCdcSnapshotPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + primaryKey: List, + override val lowerBound: List? +) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) { + override val upperBound: List? = null + override val completeState: OpaqueStateValue + get() = MsSqlServerCdcInitialSnapshotStateValue.getSnapshotCompletedState(stream) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() }, + ) +} + +/** + * Default implementation of a [JdbcPartition] for a splittable partition involving cursor columns. + */ +sealed class MsSqlServerJdbcCursorPartition( + selectQueryGenerator: SelectQueryGenerator, + streamState: DefaultJdbcStreamState, + checkpointColumns: List, + val cursor: Field, + private val explicitCursorUpperBound: JsonNode?, +) : + MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, checkpointColumns), + JdbcCursorPartition { + + val cursorUpperBound: JsonNode + get() = explicitCursorUpperBound ?: streamState.cursorUpperBound!! + + override val cursorUpperBoundQuery: SelectQuery + get() = selectQueryGenerator.generate(cursorUpperBoundQuerySpec.optimize()) + + val cursorUpperBoundQuerySpec = SelectQuerySpec(SelectColumnMaxValue(cursor), from) +} + +/** + * Default implementation of a [JdbcPartition] for a splittable snapshot partition preceding a + * cursor-based incremental sync. + */ +class MsSqlServerJdbcSnapshotWithCursorPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + primaryKey: List, + override val lowerBound: List?, + cursor: Field, + cursorUpperBound: JsonNode?, +) : + MsSqlServerJdbcCursorPartition( + selectQueryGenerator, + streamState, + primaryKey, + cursor, + cursorUpperBound + ) { + // UpperBound is not used because the partition is not splittable. + override val upperBound: List? = null + + override val completeState: OpaqueStateValue + get() = + MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint( + cursor, + cursorUpperBound, + stream, + ) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerJdbcStreamStateValue.snapshotWithCursorCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() }, + cursor, + stream, + ) +} + +/** + * Default implementation of a [JdbcPartition] for a cursor incremental partition. These are always + * splittable. + */ +class MsSqlServerJdbcCursorIncrementalPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + cursor: Field, + val cursorLowerBound: JsonNode, + override val isLowerBoundIncluded: Boolean, + cursorUpperBound: JsonNode?, +) : + MsSqlServerJdbcCursorPartition( + selectQueryGenerator, + streamState, + listOf(cursor), + cursor, + cursorUpperBound + ) { + + override val lowerBound: List = listOf(cursorLowerBound) + override val upperBound: List + get() = listOf(cursorUpperBound) + + override val completeState: OpaqueStateValue + get() = + MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint( + cursor, + cursorCheckpoint = cursorUpperBound, + stream, + ) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint( + cursor, + cursorCheckpoint = lastRecord[cursor.id] ?: Jsons.nullNode(), + stream, + ) +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcStreamStateValue.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcStreamStateValue.kt new file mode 100644 index 0000000000000..48a4a9535d760 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcStreamStateValue.kt @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.util.Jsons + +data class MsSqlServerJdbcStreamStateValue( + @JsonProperty("cursor") val cursors: String = "", + @JsonProperty("version") val version: Int = 2, + @JsonProperty("state_type") val stateType: String = StateType.CURSOR_BASED.stateType, + @JsonProperty("stream_name") val streamName: String = "", + @JsonProperty("cursor_field") val cursorField: List = listOf(), + @JsonProperty("stream_namespace") val streamNamespace: String = "", + @JsonProperty("cursor_record_count") val cursorRecordCount: Int = 0, + @JsonProperty("pk_name") val pkName: String? = null, + @JsonProperty("pk_val") val pkValue: String? = null, + @JsonProperty("incremental_state") val incrementalState: JsonNode? = null, +) { + companion object { + /** Value representing the completion of a FULL_REFRESH snapshot. */ + val snapshotCompleted: OpaqueStateValue + get() = Jsons.valueToTree(MsSqlServerJdbcStreamStateValue(stateType = "primary_key")) + + /** Value representing the progress of an ongoing incremental cursor read. */ + fun cursorIncrementalCheckpoint( + cursor: Field, + cursorCheckpoint: JsonNode, + stream: Stream, + ): OpaqueStateValue { + return Jsons.valueToTree( + MsSqlServerJdbcStreamStateValue( + cursorField = listOf(cursor.id), + cursors = cursorCheckpoint.asText(), + streamName = stream.name, + streamNamespace = stream.namespace!! + ) + ) + } + + /** Value representing the progress of an ongoing snapshot not involving cursor columns. */ + fun snapshotCheckpoint( + primaryKey: List, + primaryKeyCheckpoint: List, + ): OpaqueStateValue { + val primaryKeyField = primaryKey.first() + return Jsons.valueToTree( + MsSqlServerJdbcStreamStateValue( + pkName = primaryKeyField.id, + pkValue = primaryKeyCheckpoint.first().asText(), + stateType = StateType.PRIMARY_KEY.stateType, + ) + ) + } + + /** Value representing the progress of an ongoing snapshot involving cursor columns. */ + fun snapshotWithCursorCheckpoint( + primaryKey: List, + primaryKeyCheckpoint: List, + cursor: Field, + stream: Stream + ): OpaqueStateValue { + val primaryKeyField = primaryKey.first() + return Jsons.valueToTree( + MsSqlServerJdbcStreamStateValue( + pkName = primaryKeyField.id, + pkValue = primaryKeyCheckpoint.first().asText(), + stateType = StateType.PRIMARY_KEY.stateType, + incrementalState = + Jsons.valueToTree( + MsSqlServerJdbcStreamStateValue( + cursorField = listOf(cursor.id), + streamName = stream.name, + streamNamespace = stream.namespace!! + ) + ), + ) + ) + } + } +} + +enum class StateType(val stateType: String) { + PRIMARY_KEY("primary_key"), + CURSOR_BASED("cursor_based"), +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSelectQueryGenerator.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSelectQueryGenerator.kt new file mode 100644 index 0000000000000..3ab35aba2ca83 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSelectQueryGenerator.kt @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.LongFieldType +import io.airbyte.cdk.jdbc.LosslessJdbcFieldType +import io.airbyte.cdk.read.* +import io.airbyte.cdk.util.Jsons +import io.micronaut.context.annotation.Primary +import jakarta.inject.Singleton + +@Singleton +@Primary +class MsSqlServerSelectQueryGenerator : SelectQueryGenerator { + override fun generate(ast: SelectQuerySpec): SelectQuery = + SelectQuery(ast.sql(), ast.select.columns, ast.bindings()) + + fun SelectQuerySpec.sql(): String { + val components: List = listOf(sql(select, limit), from.sql(), where.sql(), orderBy.sql()) + val sql: String = components.filter { it.isNotBlank() }.joinToString(" ") + return sql + } + + fun sql(selectNode: SelectNode, limit: LimitNode): String { + val topClause: String = + when (limit) { + NoLimit -> "" + Limit(0) -> "TOP 0 " + is Limit -> "TOP ${limit.n} " + } + return "SELECT $topClause" + + when (selectNode) { + is SelectColumns -> selectNode.columns.joinToString(", ") { it.sql() } + is SelectColumnMaxValue -> "MAX(${selectNode.column.sql()})" + } + } + + fun Field.sql(): String = "$id" + + fun FromNode.sql(): String = + when (this) { + NoFrom -> "" + is From -> if (this.namespace == null) "FROM $name" else "FROM $namespace.$name" + is FromSample -> { + val from: String = From(name, namespace).sql() + // On a table that is very big we limit sampling to no less than 0.05% + // chance of a row getting picked. This comes at a price of bias to the beginning + // of table on very large tables ( > 100s million of rows) + val greatestRate: String = 0.00005.toString() + // Quick approximation to "select count(*) from table" which doesn't require + // full table scan. + val quickCount = + "SELECT table_rows FROM information_schema.tables WHERE table_schema = '$namespace' AND table_name = '$name'" + val greatest = "GREATEST($greatestRate, $sampleSize / ($quickCount))" + // Rand returns a value between 0 and 1 + val where = "WHERE RAND() < $greatest " + "$from $where" + } + } + + fun WhereNode.sql(): String = + when (this) { + NoWhere -> "" + is Where -> "WHERE ${clause.sql()}" + } + + fun WhereClauseNode.sql(): String = + when (this) { + is And -> conj.joinToString(") AND (", "(", ")") { it.sql() } + is Or -> disj.joinToString(") OR (", "(", ")") { it.sql() } + is Equal -> "${column.sql()} = ?" + is Greater -> "${column.sql()} > ?" + is GreaterOrEqual -> "${column.sql()} >= ?" + is LesserOrEqual -> "${column.sql()} <= ?" + is Lesser -> "${column.sql()} < ?" + } + + fun OrderByNode.sql(): String = + when (this) { + NoOrderBy -> "" + is OrderBy -> "ORDER BY " + columns.joinToString(", ") { it.sql() } + } + + fun SelectQuerySpec.bindings(): List = where.bindings() + limit.bindings() + + fun WhereNode.bindings(): List = + when (this) { + is NoWhere -> listOf() + is Where -> clause.bindings() + } + + fun WhereClauseNode.bindings(): List = + when (this) { + is And -> conj.flatMap { it.bindings() } + is Or -> disj.flatMap { it.bindings() } + is WhereClauseLeafNode -> { + val type = column.type as LosslessJdbcFieldType<*, *> + listOf(SelectQuery.Binding(bindingValue, type)) + } + } + + fun LimitNode.bindings(): List = + when (this) { + NoLimit, + Limit(0), + is Limit -> emptyList() + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSource.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSource.kt new file mode 100644 index 0000000000000..fc7250cfc09d4 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSource.kt @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.AirbyteSourceRunner +import io.github.oshai.kotlinlogging.KotlinLogging + +object MsSqlServerSource { + private val log = KotlinLogging.logger {} + + @JvmStatic + fun main(args: Array) { + log.info { "SGX parameters = ${args.toList()}" } + AirbyteSourceRunner.run(*args) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfiguration.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfiguration.kt new file mode 100644 index 0000000000000..2119fbd69b6af --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfiguration.kt @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.command.* +import io.airbyte.cdk.ssh.SshConnectionOptions +import io.airbyte.cdk.ssh.SshNoTunnelMethod +import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerCdcReplicationConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerCursorBasedReplicationConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.github.oshai.kotlinlogging.KotlinLogging +import io.micronaut.context.annotation.Factory +import jakarta.inject.Inject +import jakarta.inject.Singleton +import java.time.Duration + +sealed interface MsSqlServerIncrementalReplicationConfiguration + +data object MsSqlServerCursorBasedIncrementalReplicationConfiguration : MsSqlServerIncrementalReplicationConfiguration + +data class MsSqlServerCdcIncrementalReplicationConfiguration( + var initialWaitingSeconds: Int +) : MsSqlServerIncrementalReplicationConfiguration + +class MsSqlServerSourceConfiguration( + override val realHost: String, + override val realPort: Int, + override val sshTunnel: SshTunnelMethodConfiguration?, + override val sshConnectionOptions: SshConnectionOptions, + override val global: Boolean, + override val maxSnapshotReadDuration: Duration?, + override val checkpointTargetInterval: Duration, + override val maxConcurrency: Int, + override val resourceAcquisitionHeartbeat: Duration, + override val debeziumHeartbeatInterval: Duration, + override val jdbcUrlFmt: String, + override val jdbcProperties: Map, + override val namespaces: Set, + val incrementalReplicationConfiguration: MsSqlServerIncrementalReplicationConfiguration, +) : JdbcSourceConfiguration, CdcSourceConfiguration { +} + +@Singleton +class MsSqlServerSourceConfigurationFactory +@Inject +constructor(val featureFlags: Set) : + SourceConfigurationFactory< + MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration> { + + constructor() : this(emptySet()) + + override fun makeWithoutExceptionHandling( + pojo: MsSqlServerSourceConfigurationSpecification, + ): MsSqlServerSourceConfiguration { + val replicationMethodPojo = pojo.replicationMethodJson + val incrementalReplicationConfiguration = when (replicationMethodPojo) { + is MsSqlServerCdcReplicationConfigurationSpecification -> MsSqlServerCdcIncrementalReplicationConfiguration( + initialWaitingSeconds = replicationMethodPojo.initialWaitingSeconds ?: MsSqlServerCdcReplicationConfigurationSpecification.DEFAULT_INITIAL_WAITING_SECONDS + ) + is MsSqlServerCursorBasedReplicationConfigurationSpecification -> MsSqlServerCursorBasedIncrementalReplicationConfiguration + null -> TODO() + } + return MsSqlServerSourceConfiguration( + realHost = pojo.host, + realPort = pojo.port, + sshTunnel = SshNoTunnelMethod, + sshConnectionOptions = SshConnectionOptions.fromAdditionalProperties(emptyMap()), + global = incrementalReplicationConfiguration is MsSqlServerCdcIncrementalReplicationConfiguration, + maxSnapshotReadDuration = null, + checkpointTargetInterval = Duration.ofHours(1), + jdbcUrlFmt = "jdbc:sqlserver://%s:%d;databaseName=${pojo.database}", + namespaces = pojo.schemas?.toSet()?: setOf(), + jdbcProperties = + mapOf("encrypt" to "false", "user" to pojo.username, "password" to pojo.password), + maxConcurrency = 10, + debeziumHeartbeatInterval = Duration.ofSeconds(15), + resourceAcquisitionHeartbeat = Duration.ofSeconds(15), + incrementalReplicationConfiguration = incrementalReplicationConfiguration + ) + } + + /** Required to inject [MysqlSourceConfiguration] directly. */ + @Factory + private class MicronautFactory { + @Singleton + fun mysqlSourceConfig( + factory: + SourceConfigurationFactory< + MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration>, + supplier: ConfigurationSpecificationSupplier, + ): MsSqlServerSourceConfiguration = factory.make(supplier.get()) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerStreamFactory.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerStreamFactory.kt new file mode 100644 index 0000000000000..4fafb5abcf478 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerStreamFactory.kt @@ -0,0 +1,21 @@ +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.JdbcAirbyteStreamFactory +import io.airbyte.cdk.discover.MetaField +import io.airbyte.cdk.read.Stream +import io.micronaut.context.annotation.Primary +import jakarta.inject.Singleton +import java.time.OffsetDateTime + +@Singleton +@Primary +class MsSqlServerStreamFactory: JdbcAirbyteStreamFactory { + override val globalCursor: MetaField? = null + override val globalMetaFields: Set = emptySet() + + override fun decorateRecordData(timestamp: OffsetDateTime, globalStateValue: OpaqueStateValue?, stream: Stream, recordData: ObjectNode) { + // do nothing + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerEncryptionConfigurationSpecification.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerEncryptionConfigurationSpecification.kt new file mode 100644 index 0000000000000..76746e2bf7eda --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerEncryptionConfigurationSpecification.kt @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.config_spec + +import com.fasterxml.jackson.annotation.* +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDescription +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import io.airbyte.cdk.ConfigErrorException + +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "ssl_method") +@JsonSubTypes( + JsonSubTypes.Type(value = MsSqlServerEncryptionDisabledConfigurationSpecification::class, name = "unencrypted"), + JsonSubTypes.Type( + value = MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification::class, + name = "encrypted_trust_server_certificate" + ), + JsonSubTypes.Type(value = SslVerifyCertificate::class, name = "encrypted_verify_certificate"), +) +@JsonSchemaTitle("Encryption") +@JsonSchemaDescription("The encryption method which is used when communicating with the database.") +sealed interface MsSqlServerEncryptionConfigurationSpecification + +@JsonSchemaTitle("Unencrypted") +@JsonSchemaDescription( + "Data transfer will not be encrypted.", +) +data object MsSqlServerEncryptionDisabledConfigurationSpecification : MsSqlServerEncryptionConfigurationSpecification + +@JsonSchemaTitle("Encrypted (trust server certificate)") +@JsonSchemaDescription( + "Use the certificate provided by the server without verification. (For testing purposes only!)" +) +data object MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification : + MsSqlServerEncryptionConfigurationSpecification + +@JsonSchemaTitle("Encrypted (verify certificate)") +@JsonSchemaDescription("Verify and use the certificate provided by the server.") +@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI") +class SslVerifyCertificate : MsSqlServerEncryptionConfigurationSpecification { + @JsonProperty("hostNameInCertificate") + @JsonSchemaTitle("Host Name In Certificate") + @JsonPropertyDescription( + "Specifies the host name of the server. The value of this property must match the subject property of the certificate.", + ) + @JsonSchemaInject(json = """{"order":0}""") + var hostNameInCertificate: String? = null + + @JsonProperty("certificate", required = false) + @JsonSchemaTitle("Certificate") + @JsonPropertyDescription( + "certificate of the server, or of the CA that signed the server certificate", + ) + @JsonSchemaInject(json = """{"order":1,"airbyte_secret":true,"multiline":true}""") + var certificate: String? = null +} + +class MicronautPropertiesFriendlyMsSqlServerEncryption { + var mode: String = "preferred" + var certificate: String? = null + + @JsonValue + fun asEncryption(): MsSqlServerEncryptionConfigurationSpecification = + when (mode) { + "preferred" -> MsSqlServerEncryptionDisabledConfigurationSpecification + "required" -> MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification + "verify_ca" -> SslVerifyCertificate().also { it.certificate = certificate!! } + else -> throw ConfigErrorException("invalid value $mode") + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerReplicationMethodConfigurationSpecification.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerReplicationMethodConfigurationSpecification.kt new file mode 100644 index 0000000000000..d72b78be7a986 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerReplicationMethodConfigurationSpecification.kt @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.config_spec + +import com.fasterxml.jackson.annotation.* +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDescription +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import io.airbyte.cdk.ConfigErrorException +import io.github.oshai.kotlinlogging.KotlinLogging + +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "method") +@JsonSubTypes( + JsonSubTypes.Type(value = MsSqlServerCdcReplicationConfigurationSpecification::class, name = "CDC"), + JsonSubTypes.Type(value = MsSqlServerCursorBasedReplicationConfigurationSpecification::class, name = "STANDARD"), +) +sealed interface MsSqlServerReplicationMethodConfigurationSpecification + +@JsonSchemaTitle("Read Changes using Change Data Capture (CDC)") +@JsonSchemaDescription( + "Recommended - Incrementally reads new inserts, updates, and deletes using the SQL Server's " + + "" + + "change data capture feature. This must be enabled on your database." +) +@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI") +class MsSqlServerCdcReplicationConfigurationSpecification : MsSqlServerReplicationMethodConfigurationSpecification { + @JsonProperty("initial_waiting_seconds") + @JsonSchemaTitle("Initial Waiting Time in Seconds (Advanced)") + @JsonPropertyDescription( + "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. " + + "Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds.", + ) + @JsonSchemaInject(json = """{"order":1, "min":120, "max":3600, "default":300}""") + var initialWaitingSeconds: Int? = DEFAULT_INITIAL_WAITING_SECONDS + + @JsonProperty("invalid_cdc_cursor_position_behavior") + @JsonSchemaTitle("Invalid CDC position behavior (Advanced)") + @JsonPropertyDescription( + "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. " + + "If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. " + + "If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.", + ) + @JsonSchemaInject( + json = """{"order":2,"enum": ["Fail sync", "Re-sync data"], "default": "Fail sync"}""" + ) + var invalidCdcCursorPositionBehavior: String? = "Fail sync" + + @JsonProperty("queue_size") + @JsonSchemaTitle("Size of the queue (Advanced)") + @JsonPropertyDescription( + "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.", + ) + @JsonSchemaInject(json = """{"order":3, "min":1000, "max":10000, "default": 10000}""") + var queueSize: Int? = 10000 + + @JsonProperty("initial_load_timeout_hours") + @JsonSchemaTitle("Initial Load Timeout in Hours (Advanced)") + @JsonPropertyDescription( + "The amount of time an initial load is allowed to continue for before catching up on CDC logs.", + ) + @JsonSchemaInject(json = """{"order":4, "min":4, "max":24, "default": 8}""") + var initialLoadTimeoutHours: Int? = 8 + + companion object { + const val DEFAULT_INITIAL_WAITING_SECONDS = 300 + } +} + +@JsonSchemaTitle("Scan Changes with User Defined Cursor") +@JsonSchemaDescription( + "Incrementally detects new inserts and updates using the " + + "" + + "cursor column chosen when configuring a connection (e.g. created_at, updated_at)." +) + +class MsSqlServerCursorBasedReplicationConfigurationSpecification : MsSqlServerReplicationMethodConfigurationSpecification { +} + +class MsSqlServerMicronautPropertiesFriendlyMsSqlServerReplicationMethodConfiguration { + val method: String = "CDC" + @JsonValue + fun asReplicationMethod(): MsSqlServerReplicationMethodConfigurationSpecification = + when (method) { + "CDC" -> MsSqlServerCdcReplicationConfigurationSpecification() + "STANDARD" -> MsSqlServerCursorBasedReplicationConfigurationSpecification() + else -> throw ConfigErrorException("invalid value $method") + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerSourceConfigurationSpecification.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerSourceConfigurationSpecification.kt new file mode 100644 index 0000000000000..fe396f1261c06 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerSourceConfigurationSpecification.kt @@ -0,0 +1,119 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql.config_spec + +import com.fasterxml.jackson.annotation.JsonGetter +import com.fasterxml.jackson.annotation.JsonIgnore +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.annotation.JsonPropertyDescription +import com.fasterxml.jackson.annotation.JsonPropertyOrder +import com.fasterxml.jackson.annotation.JsonSetter +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDefault +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import io.airbyte.cdk.command.CONNECTOR_CONFIG_PREFIX +import io.airbyte.cdk.command.ConfigurationSpecification +import io.micronaut.context.annotation.ConfigurationBuilder +import io.micronaut.context.annotation.ConfigurationProperties +import jakarta.inject.Singleton + +/** + * The object which is mapped to the MS SQL Server source configuration JSON. + * + * Use [MysqlSourceConfiguration] instead wherever possible. This object also allows injecting + * values through Micronaut properties, this is made possible by the classes named + * `MicronautPropertiesFriendly.*`. + */ +@JsonSchemaTitle("MSSQL Source Spec") +@JsonPropertyOrder( + value = ["host", "port", "database", "schemas", "username", "password"], +) +@Singleton +@ConfigurationProperties(CONNECTOR_CONFIG_PREFIX) +@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI") +class MsSqlServerSourceConfigurationSpecification : ConfigurationSpecification() { + @JsonProperty("host") + @JsonSchemaTitle("Host") + @JsonSchemaInject(json = """{"order":0}""") + @JsonPropertyDescription("The hostname of the database.") + lateinit var host: String + + @JsonProperty("port") + @JsonSchemaTitle("Port") + @JsonSchemaInject(json = """{"order":1,"minimum": 0,"maximum": 65536, "examples":["1433"]}""") + @JsonSchemaDefault("3306") + @JsonPropertyDescription( + "The port of the database.", + ) + var port: Int = 3306 + + @JsonProperty("database") + @JsonSchemaTitle("Database") + @JsonPropertyDescription("The name of the database.") + @JsonSchemaInject(json = """{"order":2, "examples":["master"]}""") + lateinit var database: String + + @JsonProperty("schemas") + @JsonSchemaTitle("Schemas") + @JsonPropertyDescription("The list of schemas to sync from. Defaults to user. Case sensitive.") + // @DefaultSchemaDefault doesn't seem to work for array types... + @JsonSchemaInject(json = """{"order":3, "default":["dbo"], "minItems":0, "uniqueItems":true}""") + var schemas: Array? = arrayOf("dbo") + + @JsonProperty("username") + @JsonSchemaTitle("Username") + @JsonPropertyDescription("The username which is used to access the database.") + @JsonSchemaInject(json = """{"order":4}""") + lateinit var username: String + + @JsonProperty("password") + @JsonSchemaTitle("Password") + @JsonPropertyDescription("The password associated with the username.") + @JsonSchemaInject(json = """{"order":5,"airbyte_secret":true}""") + lateinit var password: String + + @JsonProperty("jdbc_url_params") + @JsonSchemaTitle("JDBC URL Params") + @JsonPropertyDescription( + "Additional properties to pass to the JDBC URL string when connecting to the database " + + "formatted as 'key=value' pairs separated by the symbol '&'. " + + "(example: key1=value1&key2=value2&key3=value3).", + ) + @JsonSchemaInject(json = """{"order":6}""") + var jdbcUrlParams: String? = null + + @JsonIgnore + @ConfigurationBuilder(configurationPrefix = "ssl_method") + var encryption = MicronautPropertiesFriendlyMsSqlServerEncryption() + @JsonIgnore var encryptionJson: MsSqlServerEncryptionConfigurationSpecification? = null + @JsonSetter("ssl_method") + fun setEncryptionValue(value: MsSqlServerEncryptionConfigurationSpecification) { + encryptionJson = value + } + @JsonGetter("ssl_method") + @JsonSchemaTitle("SSL Method") + @JsonPropertyDescription( + "The encryption method which is used when communicating with the database.", + ) + @JsonSchemaInject(json = """{"order":7}""") + fun getEncryptionValue(): MsSqlServerEncryptionConfigurationSpecification? = + encryptionJson ?: encryption.asEncryption() + + @JsonIgnore + @ConfigurationBuilder(configurationPrefix = "method") + var replicationMethod = MsSqlServerMicronautPropertiesFriendlyMsSqlServerReplicationMethodConfiguration() + @JsonIgnore + var replicationMethodJson: MsSqlServerReplicationMethodConfigurationSpecification? = null + @JsonSetter("replication_method") + fun setReplicationMethodValue(value: MsSqlServerReplicationMethodConfigurationSpecification) { + replicationMethodJson = value + } + @JsonGetter("replication_method") + @JsonSchemaTitle("Update Method") + @JsonPropertyDescription( + "Configures how data is extracted from the database.", + ) + @JsonSchemaInject(json = """{"order":8, "default":"CDC", "display_type": "radio"}""") + fun getReplicationMethodValue(): MsSqlServerReplicationMethodConfigurationSpecification? = + replicationMethodJson ?: replicationMethod.asReplicationMethod() +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/resources/application.yml b/airbyte-integrations/connectors/source-mssql/src/main/resources/application.yml new file mode 100644 index 0000000000000..b9bc7b0d3d246 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/resources/application.yml @@ -0,0 +1,12 @@ +--- +airbyte: + connector: + extract: + jdbc: + mode: sequential + namespace-kind: SCHEMA + check: + jdbc: + queries: + - >- + SELECT 1 where 1 = 0; diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerContainerFactory.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerContainerFactory.kt new file mode 100644 index 0000000000000..e03b9da645dbc --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerContainerFactory.kt @@ -0,0 +1,123 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.testcontainers.TestContainerFactory +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerCursorBasedReplicationConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerReplicationMethodConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.github.oshai.kotlinlogging.KotlinLogging +import org.apache.commons.lang3.RandomStringUtils +import org.apache.commons.lang3.StringUtils +import org.testcontainers.containers.Container +import org.testcontainers.containers.MSSQLServerContainer +import org.testcontainers.containers.Network +import org.testcontainers.utility.DockerImageName +import java.sql.Statement +import kotlin.random.Random + +enum class MsSqlServerImage(val imageName: String) { + SQLSERVER_2022("mcr.microsoft.com/mssql/server:2022-latest") + ; +} + +class MsSqlServercontainer(val realContainer: MSSQLServerContainer<*>){ + val schemaName = "schema_"+RandomStringUtils.insecure().nextAlphabetic(16); +} + +object MsSqlServerContainerFactory { + const val COMPATIBLE_NAME = "mcr.microsoft.com/mssql/server" + + + init { + TestContainerFactory.register(COMPATIBLE_NAME, ::MSSQLServerContainer) + } + + sealed interface MysqlContainerModifier : + TestContainerFactory.ContainerModifier> + + data object WithNetwork : MysqlContainerModifier { + override fun modify(container: MSSQLServerContainer<*>) { + container.withNetwork(Network.newNetwork()) + } + } + + data object WithCdcOff : MysqlContainerModifier { + override fun modify(container: MSSQLServerContainer<*>) { + container.withCommand("--skip-log-bin") + } + } + + fun exclusive( + image: MsSqlServerImage, + vararg modifiers: MysqlContainerModifier, + ): MsSqlServercontainer { + val dockerImageName = + DockerImageName.parse(image.imageName).asCompatibleSubstituteFor(COMPATIBLE_NAME) + return MsSqlServercontainer(TestContainerFactory.exclusive(dockerImageName, *modifiers)) + } + + fun shared( + image: MsSqlServerImage, + vararg modifiers: MysqlContainerModifier, + ): MsSqlServercontainer { + val dockerImageName = + DockerImageName.parse(image.imageName).asCompatibleSubstituteFor(COMPATIBLE_NAME) + return MsSqlServercontainer(TestContainerFactory.shared(dockerImageName, *modifiers)) + } + + @JvmStatic + fun config( + msSQLContainer: MsSqlServercontainer + ): MsSqlServerSourceConfigurationSpecification { + val schemaName = msSQLContainer.schemaName + val config = MsSqlServerSourceConfigurationSpecification().apply { + host = msSQLContainer.realContainer.host + port = msSQLContainer.realContainer.getMappedPort(MSSQLServerContainer.MS_SQL_SERVER_PORT) + username = msSQLContainer.realContainer.username + password = msSQLContainer.realContainer.password + jdbcUrlParams = "" + database = "master" + schemas = arrayOf(schemaName) + replicationMethodJson = MsSqlServerCursorBasedReplicationConfigurationSpecification() + } + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config)).get().use {connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("CREATE SCHEMA $schemaName") + } + connection.createStatement().use { stmt: Statement -> + stmt.execute("CREATE TABLE $schemaName.name_and_born(name VARCHAR(200), born DATETIMEOFFSET(7));") + stmt.execute("CREATE TABLE $schemaName.id_name_and_born(id INTEGER PRIMARY KEY, name VARCHAR(200), born DATETIMEOFFSET(7));") + } + connection.createStatement().use { stmt: Statement -> + stmt.execute("INSERT INTO $schemaName.name_and_born (name, born) VALUES ('foo', '2022-03-21 15:43:15.45'), ('bar', '2022-10-22 01:02:03.04')") + stmt.execute("INSERT INTO $schemaName.id_name_and_born (id, name, born) VALUES (1, 'foo', '2022-03-21 15:43:15.45'), (2, 'bar', '2022-10-22 01:02:03.04')") + } + } + return config + } + + @JvmStatic + fun cdcConfig( + msSQLContainer: MSSQLServerContainer<*> + ): MsSqlServerSourceConfigurationSpecification = + MsSqlServerSourceConfigurationSpecification().apply { + host = msSQLContainer.host + port = msSQLContainer.getMappedPort(MSSQLServerContainer.MS_SQL_SERVER_PORT) + username = msSQLContainer.username + password = msSQLContainer.password + jdbcUrlParams = "" + database = "dbo" + } + + fun MSSQLServerContainer<*>.execAsRoot(sql: String) { + val cleanSql: String = sql.trim().removeSuffix(";") + ";" + val result: Container.ExecResult = + execInContainer("mysql", "-u", "root", "-ptest", "-e", cleanSql) + if (result.exitCode == 0) { + return + } + throw RuntimeException("Failed to execute query $cleanSql") + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCursorBasedIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCursorBasedIntegrationTest.kt new file mode 100644 index 0000000000000..5bd3d0a338544 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCursorBasedIntegrationTest.kt @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.StreamIdentifier +import io.airbyte.cdk.command.CliRunner +import io.airbyte.cdk.discover.DiscoveredStream +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.IntFieldType +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.jdbc.StringFieldType +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.v0.AirbyteRecordMessage +import io.airbyte.protocol.models.v0.AirbyteStateMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.CatalogHelpers +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream +import io.airbyte.protocol.models.v0.StreamDescriptor +import io.airbyte.protocol.models.v0.SyncMode +import io.github.oshai.kotlinlogging.KotlinLogging +import java.sql.Connection +import java.sql.Statement +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.Timeout +import org.testcontainers.containers.MSSQLServerContainer +import kotlin.test.assertEquals + +class MsSqlServerCursorBasedIntegrationTest { + + @Test + fun testCursorBasedRead() { + val run1: BufferingOutputConsumer = + CliRunner.source("read", config, getConfiguredCatalog()).run() + + val lastStateMessageFromRun1 = run1.states().last() + val lastStreamStateFromRun1 = lastStateMessageFromRun1.stream.streamState + println("SGX lastStreamStateFromRun1=$lastStreamStateFromRun1") + + assertEquals("20", lastStreamStateFromRun1.get("cursor").textValue()) + assertEquals(2, lastStreamStateFromRun1.get("version").intValue()) + assertEquals("cursor_based", lastStreamStateFromRun1.get("state_type").asText()) + assertEquals(tableName, lastStreamStateFromRun1.get("stream_name").asText()) + assertEquals(listOf("k"), lastStreamStateFromRun1.get("cursor_field").map { it.asText() }) + assertEquals(dbContainer.schemaName, lastStreamStateFromRun1.get("stream_namespace").asText()) + assertEquals(0, lastStreamStateFromRun1.get("cursor_record_count").asInt()) + + connectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("INSERT INTO ${dbContainer.schemaName}.$tableName (k, v) VALUES (3, 'baz-ignore')") + stmt.execute("INSERT INTO ${dbContainer.schemaName}.$tableName (k, v) VALUES (13, 'baz-ignore')") + stmt.execute("INSERT INTO ${dbContainer.schemaName}.$tableName (k, v) VALUES (30, 'baz')") + } + } + + val run2InputState: List = listOf(lastStateMessageFromRun1) + val run2: BufferingOutputConsumer = + CliRunner.source("read", config, getConfiguredCatalog(), run2InputState).run() + val recordMessageFromRun2: List = run2.records() + assertEquals(recordMessageFromRun2.size, 1) + } + + @Test + fun testWithV1State() { + var state: AirbyteStateMessage = Jsons.readValue(V1_STATE, AirbyteStateMessage::class.java) + val run1: BufferingOutputConsumer = + CliRunner.source("read", config, getConfiguredCatalog(), listOf(state)).run() + val recordMessagesFromRun1: List = run1.records() + assertEquals(actual=recordMessagesFromRun1.size, expected=1, message = recordMessagesFromRun1.toString()) + } + + @Test + fun testWithFullRefresh() { + val fullRefreshCatalog = + getConfiguredCatalog().apply { streams[0].syncMode = SyncMode.FULL_REFRESH } + val run1: BufferingOutputConsumer = + CliRunner.source("read", config, fullRefreshCatalog).run() + val recordMessageFromRun1: List = run1.records() + assertEquals(3, recordMessageFromRun1.size, recordMessageFromRun1.toString()) + val lastStateMessageFromRun1 = run1.states().last() + + val run2: BufferingOutputConsumer = + CliRunner.source("read", config, fullRefreshCatalog, listOf(lastStateMessageFromRun1)) + .run() + val recordMessageFromRun2: List = run2.records() + assertEquals(recordMessageFromRun2.size, 0) + } + + companion object { + val log = KotlinLogging.logger {} + val dbContainer: MsSqlServercontainer = MsSqlServerContainerFactory.shared(MsSqlServerImage.SQLSERVER_2022) + + val config: MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.config(dbContainer) + + val connectionFactory: JdbcConnectionFactory by lazy { + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config)) + } + + fun getConfiguredCatalog(): ConfiguredAirbyteCatalog { + val desc = StreamDescriptor().withName(tableName).withNamespace(dbContainer.schemaName) + val discoveredStream = + DiscoveredStream( + id = StreamIdentifier.Companion.from(desc), + columns = listOf(Field("k", IntFieldType), Field("v", StringFieldType)), + primaryKeyColumnIDs = listOf(listOf("k")), + ) + val stream: AirbyteStream = MsSqlServerStreamFactory().createGlobal(discoveredStream) + val configuredStream: ConfiguredAirbyteStream = + CatalogHelpers.toDefaultConfiguredStream(stream) + .withSyncMode(SyncMode.INCREMENTAL) + .withPrimaryKey(discoveredStream.primaryKeyColumnIDs) + .withCursorField(listOf("k")) + return ConfiguredAirbyteCatalog().withStreams(listOf(configuredStream)) + } + + @JvmStatic + @BeforeAll + @Timeout(value = 300) + fun startAndProvisionTestContainer() { + provisionTestContainer(connectionFactory) + } + + lateinit var tableName: String + + fun provisionTestContainer(targetConnectionFactory: JdbcConnectionFactory) { + tableName = (1..8).map { ('a'..'z').random() }.joinToString("") + + targetConnectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("CREATE TABLE ${dbContainer.schemaName}.$tableName(k INT PRIMARY KEY, v VARCHAR(80))") + } + connection.createStatement().use { stmt: Statement -> + stmt.execute( + "INSERT INTO ${dbContainer.schemaName}.$tableName (k, v) VALUES (5, 'abc'), (10, 'foo'), (20, 'bar')" + ) + } + } + } + } + val V1_STATE: String = + """ + { + "type": "STREAM", + "stream": { + "stream_descriptor": { + "name": "${tableName}", + "namespace": "${dbContainer.schemaName}" + }, + "stream_state": { + "cursor": "10", + "version": 2, + "state_type": "cursor_based", + "stream_name": "${tableName}", + "cursor_field": [ + "k" + ], + "stream_namespace": "${dbContainer.schemaName}", + "cursor_record_count": 1 + } + } + } + """ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationSpecificationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationSpecificationTest.kt new file mode 100644 index 0000000000000..d15bb872522e3 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationSpecificationTest.kt @@ -0,0 +1,72 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.ConfigErrorException +import io.airbyte.cdk.command.ConfigurationSpecificationSupplier +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerEncryptionConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerEncryptionDisabledConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.micronaut.context.annotation.Property +import io.micronaut.context.env.Environment +import io.micronaut.test.extensions.junit5.annotation.MicronautTest +import jakarta.inject.Inject +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +@MicronautTest(environments = [Environment.TEST], rebuildContext = true) +class MysqlSourceConfigurationSpecificationTestTest { + @Inject + lateinit var supplier: ConfigurationSpecificationSupplier + + @Test + fun testSchemaViolation() { + Assertions.assertThrows(ConfigErrorException::class.java, supplier::get) + } + + @Test + @Property(name = "airbyte.connector.config.json", value = CONFIG_JSON) + fun testJson() { + val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get() + Assertions.assertEquals("localhost", pojo.host) + Assertions.assertEquals(12345, pojo.port) + Assertions.assertEquals("FOO", pojo.username) + Assertions.assertEquals("BAR", pojo.password) + Assertions.assertEquals("SYSTEM", pojo.database) + val encryption: MsSqlServerEncryptionConfigurationSpecification? = pojo.getEncryptionValue() + Assertions.assertTrue(encryption is MsSqlServerEncryptionDisabledConfigurationSpecification, encryption!!::class.toString()) + /*val tunnelMethod: SshTunnelMethodConfiguration? = pojo.getTunnelMethodValue() + Assertions.assertTrue( + tunnelMethod is SshPasswordAuthTunnelMethod, + tunnelMethod!!::class.toString(), + ) + Assertions.assertEquals(60, pojo.checkpointTargetIntervalSeconds) + Assertions.assertEquals(2, pojo.concurrency)*/ + } +} + +const val CONFIG_JSON: String = + """ +{ + "host": "localhost", + "port": 12345, + "username": "FOO", + "password": "BAR", + "database": "SYSTEM", + "ssl_mode": { + "mode": "preferred" + }, + "tunnel_method": { + "tunnel_method": "SSH_PASSWORD_AUTH", + "tunnel_host": "localhost", + "tunnel_port": 2222, + "tunnel_user": "sshuser", + "tunnel_user_password": "***" + }, + "replication_method": { + "method": "STANDARD" + }, + "checkpoint_target_interval_seconds": 60, + "jdbc_url_params": "theAnswerToLiveAndEverything=42&sessionVariables=max_execution_time=10000&foo=bar&", + "concurrency": 2 +} +""" diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationTest.kt new file mode 100644 index 0000000000000..e0db31ffa3a24 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationTest.kt @@ -0,0 +1,150 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.ConfigErrorException +import io.airbyte.cdk.command.AIRBYTE_CLOUD_ENV +import io.airbyte.cdk.command.ConfigurationSpecificationSupplier +import io.airbyte.cdk.command.SourceConfigurationFactory +import io.airbyte.cdk.ssh.SshNoTunnelMethod +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.micronaut.context.annotation.Property +import io.micronaut.context.env.Environment +import io.micronaut.test.extensions.junit5.annotation.MicronautTest +import jakarta.inject.Inject +import java.time.Duration +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +@MicronautTest(environments = [Environment.TEST, AIRBYTE_CLOUD_ENV], rebuildContext = true) +class MysqlSourceConfigurationTest { + @Inject + lateinit var pojoSupplier: + ConfigurationSpecificationSupplier + + @Inject + lateinit var factory: + SourceConfigurationFactory + + @Test + @Property(name = "airbyte.connector.config.host", value = "localhost") + @Property(name = "airbyte.connector.config.port", value = "12345") + @Property(name = "airbyte.connector.config.username", value = "FOO") + @Property(name = "airbyte.connector.config.password", value = "BAR") + @Property(name = "airbyte.connector.config.database", value = "SYSTEM") + @Property(name = "airbyte.connector.config.ssl_mode.mode", value = "required") + @Property( + name = "airbyte.connector.config.jdbc_url_params", + value = "theAnswerToLiveAndEverything=42&sessionVariables=max_execution_time=10000&foo=bar&" + ) + fun testParseJdbcParameters() { + val pojo: MsSqlServerSourceConfigurationSpecification = pojoSupplier.get() + + val config = factory.makeWithoutExceptionHandling(pojo) + + Assertions.assertEquals(config.realHost, "localhost") + Assertions.assertEquals(config.realPort, 12345) + Assertions.assertEquals(config.namespaces, setOf("SYSTEM")) + Assertions.assertTrue(config.sshTunnel is SshNoTunnelMethod) + + Assertions.assertEquals(config.jdbcProperties["user"], "FOO") + Assertions.assertEquals(config.jdbcProperties["password"], "BAR") + + // Make sure we don't accidentally drop the following hardcoded settings for mysql. + Assertions.assertEquals(config.jdbcProperties["useCursorFetch"], "true") + Assertions.assertEquals(config.jdbcProperties["sessionVariables"], "autocommit=0") + + Assertions.assertEquals(config.jdbcProperties["theAnswerToLiveAndEverything"], "42") + Assertions.assertEquals(config.jdbcProperties["foo"], "bar") + } + + @Test + @Property(name = "airbyte.connector.config.host", value = "localhost") + @Property(name = "airbyte.connector.config.port", value = "12345") + @Property(name = "airbyte.connector.config.username", value = "FOO") + @Property(name = "airbyte.connector.config.password", value = "BAR") + @Property(name = "airbyte.connector.config.database", value = "SYSTEM") + fun testAirbyteCloudDeployment() { + val pojo: MsSqlServerSourceConfigurationSpecification = pojoSupplier.get() + Assertions.assertThrows(ConfigErrorException::class.java) { + factory.makeWithoutExceptionHandling(pojo) + } + } + + @Test + @Property(name = "airbyte.connector.config.json", value = CONFIG_V1) + fun testParseConfigFromV1() { + val pojo: MsSqlServerSourceConfigurationSpecification = pojoSupplier.get() + + val config = factory.makeWithoutExceptionHandling(pojo) + + Assertions.assertEquals(config.realHost, "localhost") + Assertions.assertEquals(config.realPort, 12345) + Assertions.assertEquals(config.namespaces, setOf("dbo")) + + Assertions.assertEquals(config.jdbcProperties["user"], "FOO") + Assertions.assertEquals(config.jdbcProperties["password"], "BAR") + //Assertions.assertEquals(config.jdbcProperties["sslMode"], "required") + Assertions.assertTrue(config.incrementalReplicationConfiguration is MsSqlServerCdcIncrementalReplicationConfiguration) + + val cdcCursor = config.incrementalReplicationConfiguration as MsSqlServerCdcIncrementalReplicationConfiguration + + Assertions.assertEquals(cdcCursor.initialWaitingSeconds, 301) + /*Assertions.assertEquals(cdcCursor.initialLoadTimeout, Duration.ofHours(9)) + Assertions.assertEquals( + cdcCursor.invalidCdcCursorPositionBehavior, + InvalidCdcCursorPositionBehavior.RESET_SYNC + )*/ + + Assertions.assertTrue(config.sshTunnel is SshNoTunnelMethod) + } +} + +const val CONFIG: String = + """ +{ + "host": "localhost", + "port": 12345, + "username": "FOO", + "password": "BAR", + "database": "SYSTEM", + "ssl_mode": { + "mode": "preferred" + }, + "tunnel_method": { + "tunnel_method": "SSH_PASSWORD_AUTH", + "tunnel_host": "localhost", + "tunnel_port": 2222, + "tunnel_user": "sshuser", + "tunnel_user_password": "***" + }, + "replication_method": { + "method": "STANDARD" + }, + "checkpoint_target_interval_seconds": 60, + "jdbc_url_params": "theAnswerToLiveAndEverything=42&sessionVariables=max_execution_time=10000&foo=bar&", + "concurrency": 2 +} +""" + +const val CONFIG_V1: String = + """ +{ + "host": "localhost", + "port": 12345, + "database": "SYSTEM", + "password": "BAR", + "ssl_mode": { + "mode": "required" + }, + "username": "FOO", + "tunnel_method": { + "tunnel_method": "NO_TUNNEL" + }, + "replication_method": { + "method": "CDC", + "initial_waiting_seconds": 301, + "initial_load_timeout_hours": 9, + "invalid_cdc_cursor_position_behavior": "Re-sync data" + } +} +""" diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceDatatypeIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceDatatypeIntegrationTest.kt new file mode 100644 index 0000000000000..0a6e4e78b59b5 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceDatatypeIntegrationTest.kt @@ -0,0 +1,467 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.ClockFactory +import io.airbyte.cdk.command.CliRunner +import io.airbyte.cdk.data.AirbyteSchemaType +import io.airbyte.cdk.data.LeafAirbyteSchemaType +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.AirbyteRecordMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.AirbyteTraceMessage +import io.airbyte.protocol.models.v0.CatalogHelpers +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream +import io.airbyte.protocol.models.v0.SyncMode +import io.github.oshai.kotlinlogging.KotlinLogging +import java.sql.Connection +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.DynamicContainer +import org.junit.jupiter.api.DynamicNode +import org.junit.jupiter.api.DynamicTest +import org.junit.jupiter.api.TestFactory +import org.junit.jupiter.api.Timeout +import org.testcontainers.containers.MSSQLServerContainer + +private val log = KotlinLogging.logger {} + +class MsSqlServerSourceDatatypeIntegrationTest { + @TestFactory + @Timeout(300) + fun syncTests(): Iterable { + val discover: DynamicNode = + DynamicTest.dynamicTest("discover") { + Assertions.assertFalse(LazyValues.actualStreams.isEmpty()) + } + val read: DynamicNode = + DynamicTest.dynamicTest("read") { + Assertions.assertFalse(LazyValues.actualReads.isEmpty()) + } + val cases: List = + allStreamNamesAndRecordData.keys.map { streamName: String -> + DynamicContainer.dynamicContainer( + streamName, + listOf( + DynamicTest.dynamicTest("discover") { discover(streamName) }, + DynamicTest.dynamicTest("records") { records(streamName) }, + ), + ) + } + return listOf(discover, read) + cases + } + + object LazyValues { + val actualStreams: Map by lazy { + val output: BufferingOutputConsumer = CliRunner.source("discover", config()).run() + output.catalogs().firstOrNull()?.streams?.filterNotNull()?.associateBy { it.name } + ?: mapOf() + } + + val configuredCatalog: ConfiguredAirbyteCatalog by lazy { + val configuredStreams: List = + allStreamNamesAndRecordData.keys + .mapNotNull { actualStreams[it] } + .map(CatalogHelpers::toDefaultConfiguredStream) + for (configuredStream in configuredStreams) { + if (configuredStream.stream.supportedSyncModes.contains(SyncMode.INCREMENTAL)) { + configuredStream.syncMode = SyncMode.INCREMENTAL + } + } + ConfiguredAirbyteCatalog().withStreams(configuredStreams) + } + + val allReadMessages: List by lazy { + CliRunner.source("read", config(), configuredCatalog).run().messages() + } + + val actualReads: Map by lazy { + val result: Map = + allStreamNamesAndRecordData.keys.associateWith { + BufferingOutputConsumer(ClockFactory().fixed()) + } + for (msg in allReadMessages) { + result[streamName(msg) ?: continue]?.accept(msg) + } + result + } + + fun streamName(msg: AirbyteMessage): String? = + when (msg.type) { + AirbyteMessage.Type.RECORD -> msg.record?.stream + AirbyteMessage.Type.STATE -> msg.state?.stream?.streamDescriptor?.name + AirbyteMessage.Type.TRACE -> + when (msg.trace?.type) { + AirbyteTraceMessage.Type.ERROR -> msg.trace?.error?.streamDescriptor?.name + AirbyteTraceMessage.Type.ESTIMATE -> msg.trace?.estimate?.name + AirbyteTraceMessage.Type.STREAM_STATUS -> + msg.trace?.streamStatus?.streamDescriptor?.name + AirbyteTraceMessage.Type.ANALYTICS -> null + null -> null + } + else -> null + } + } + + private fun discover(streamName: String) { + val actualStream: AirbyteStream? = LazyValues.actualStreams[streamName] + log.info { "discover result: ${LazyValues.actualStreams}" } + log.info { "streamName: $streamName" } + Assertions.assertNotNull(actualStream) + log.info { + "test case $streamName: discovered stream ${ + Jsons.valueToTree( + actualStream, + ) + }" + } + val testCase: TestCase = + testCases.find { it.streamNamesToRecordData.keys.contains(streamName) }!! + val isIncrementalSupported: Boolean = + actualStream!!.supportedSyncModes.contains(SyncMode.INCREMENTAL) + val jsonSchema: JsonNode = actualStream.jsonSchema?.get("properties")!! + if (streamName == testCase.tableName) { + val actualSchema: JsonNode = jsonSchema[testCase.columnName] + Assertions.assertNotNull(actualSchema) + val expectedSchema: JsonNode = testCase.airbyteSchemaType.asJsonSchema() + Assertions.assertEquals(expectedSchema, actualSchema) + if (testCase.cursor) { + Assertions.assertTrue(isIncrementalSupported) + } else { + Assertions.assertFalse(isIncrementalSupported) + } + } + } + + private fun records(streamName: String) { + val actualRead: BufferingOutputConsumer? = LazyValues.actualReads[streamName] + Assertions.assertNotNull(actualRead) + + fun sortedRecordData(data: List): JsonNode = + Jsons.createArrayNode().apply { addAll(data.sortedBy { it.toString() }) } + + val actualRecords: List = actualRead?.records() ?: listOf() + + val actual: JsonNode = sortedRecordData(actualRecords.mapNotNull { it.data }) + log.info { "test case $streamName: emitted records $actual" } + val expected: JsonNode = sortedRecordData(allStreamNamesAndRecordData[streamName]!!) + + Assertions.assertEquals(expected, actual) + } + + companion object { + lateinit var dbContainer: MsSqlServercontainer + + fun config(): MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.config(dbContainer) + + val connectionFactory: JdbcConnectionFactory by lazy { + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config())) + } + + val bitValues = + mapOf( + "b'1'" to "true", + "b'0'" to "false", + ) + + val longBitValues = + mapOf( + "b'10101010'" to """-86""", + ) + + val stringValues = + mapOf( + "'abcdef'" to """"abcdef"""", + "'ABCD'" to """"ABCD"""", + "'OXBEEF'" to """"OXBEEF"""", + ) + + val jsonValues = mapOf("""'{"col1": "v1"}'""" to """"{\"col1\": \"v1\"}"""") + + val yearValues = + mapOf( + "1992" to """1992""", + "2002" to """2002""", + "70" to """1970""", + ) + + val decimalValues = + mapOf( + "0.2" to """0.2""", + ) + + val zeroPrecisionDecimalValues = + mapOf( + "2" to """2""", + ) + + val tinyintValues = + mapOf( + "10" to "10", + "4" to "4", + "2" to "2", + ) + + val intValues = + mapOf( + "10" to "10", + "100000000" to "100000000", + "200000000" to "200000000", + ) + + val dateValues = + mapOf( + "'2022-01-01'" to """"2022-01-01"""", + ) + + val timeValues = + mapOf( + "'14:30:00'" to """"14:30:00.000000"""", + ) + + val dateTimeValues = + mapOf( + "'2024-09-13 14:30:00'" to """"2024-09-13T14:30:00.000000"""", + "'2024-09-13T14:40:00+00:00'" to """"2024-09-13T14:40:00.000000"""" + ) + + val timestampValues = + mapOf( + "'2024-09-12 14:30:00'" to """"2024-09-12T14:30:00.000000Z"""", + "CONVERT_TZ('2024-09-12 14:30:00', 'America/Los_Angeles', 'UTC')" to + """"2024-09-12T21:30:00.000000Z"""", + ) + + val booleanValues = + mapOf( + "TRUE" to "true", + "FALSE" to "false", + ) + + val enumValues = + mapOf( + "'a'" to """"a"""", + "'b'" to """"b"""", + "'c'" to """"c"""", + ) + + // Encoded into base64 + val binaryValues = + mapOf( + "X'89504E470D0A1A0A0000000D49484452'" to """"iVBORw0KGgoAAAANSUhEUg=="""", + ) + + val testCases: List = + listOf( + TestCase( + "BOOLEAN", + booleanValues, + airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN, + cursor = false + ), + TestCase( + "VARCHAR(10)", + stringValues, + airbyteSchemaType = LeafAirbyteSchemaType.STRING + ), + TestCase( + "DECIMAL(10,2)", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "DECIMAL(10,2) UNSIGNED", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "DECIMAL UNSIGNED", + zeroPrecisionDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase("FLOAT", decimalValues, airbyteSchemaType = LeafAirbyteSchemaType.NUMBER), + TestCase( + "FLOAT(7,4)", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "FLOAT(53,8)", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase("DOUBLE", decimalValues, airbyteSchemaType = LeafAirbyteSchemaType.NUMBER), + TestCase( + "DOUBLE UNSIGNED", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "TINYINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "TINYINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "SMALLINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "MEDIUMINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase("BIGINT", intValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "SMALLINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "MEDIUMINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "BIGINT UNSIGNED", + intValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase("INT", intValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "INT UNSIGNED", + intValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase("DATE", dateValues, airbyteSchemaType = LeafAirbyteSchemaType.DATE), + TestCase( + "TIMESTAMP", + timestampValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE + ), + TestCase( + "DATETIME", + dateTimeValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE + ), + TestCase( + "TIME", + timeValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE + ), + TestCase("YEAR", yearValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "VARBINARY(255)", + binaryValues, + airbyteSchemaType = LeafAirbyteSchemaType.BINARY, + cursor = true, + noPK = false + ), + TestCase( + "BIT", + bitValues, + airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN, + cursor = false + ), + TestCase( + "BIT(8)", + longBitValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "JSON", + jsonValues, + airbyteSchemaType = LeafAirbyteSchemaType.STRING, + noPK = true + ), + TestCase( + "ENUM('a', 'b', 'c')", + enumValues, + airbyteSchemaType = LeafAirbyteSchemaType.STRING, + noPK = true + ), + ) + + val allStreamNamesAndRecordData: Map> = + testCases.flatMap { it.streamNamesToRecordData.toList() }.toMap() + + @JvmStatic + @BeforeAll + @Timeout(value = 300) + fun startAndProvisionTestContainer() { + dbContainer = + MsSqlServerContainerFactory.exclusive( + MsSqlServerImage.SQLSERVER_2022, + MsSqlServerContainerFactory.WithNetwork, + ) + connectionFactory + .get() + .also { it.isReadOnly = false } + .use { connection: Connection -> + for (case in testCases) { + for (sql in case.sqlStatements) { + log.info { "test case ${case.id}: executing $sql" } + connection.createStatement().use { stmt -> stmt.execute(sql) } + } + } + } + } + } + + data class TestCase( + val sqlType: String, + val sqlToAirbyte: Map, + val airbyteSchemaType: AirbyteSchemaType = LeafAirbyteSchemaType.STRING, + val cursor: Boolean = true, + val noPK: Boolean = false, + val customDDL: List? = null, + ) { + val id: String + get() = + sqlType + .replace("[^a-zA-Z0-9]".toRegex(), " ") + .trim() + .replace(" +".toRegex(), "_") + .lowercase() + + val tableName: String + get() = "tbl_$id" + + val columnName: String + get() = "col_$id" + + val sqlStatements: List + get() { + val ddl: List = + listOf( + "CREATE DATABASE IF NOT EXISTS test", + "USE test", + "CREATE TABLE IF NOT EXISTS $tableName " + + "($columnName $sqlType ${if (noPK) "" else "PRIMARY KEY"})", + "TRUNCATE TABLE $tableName", + ) + val dml: List = + sqlToAirbyte.keys.map { "INSERT INTO $tableName ($columnName) VALUES ($it)" } + + return ddl + dml + } + + val streamNamesToRecordData: Map> + get() { + val recordData: List = + sqlToAirbyte.values.map { Jsons.readTree("""{"${columnName}":$it}""") } + return mapOf(tableName to recordData) + } + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceSelectQueryGeneratorTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceSelectQueryGeneratorTest.kt new file mode 100644 index 0000000000000..4f3bab45c509a --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceSelectQueryGeneratorTest.kt @@ -0,0 +1,141 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.DoubleFieldType +import io.airbyte.cdk.jdbc.IntFieldType +import io.airbyte.cdk.jdbc.LongFieldType +import io.airbyte.cdk.jdbc.LosslessJdbcFieldType +import io.airbyte.cdk.jdbc.OffsetDateTimeFieldType +import io.airbyte.cdk.jdbc.StringFieldType +import io.airbyte.cdk.read.And +import io.airbyte.cdk.read.Equal +import io.airbyte.cdk.read.From +import io.airbyte.cdk.read.Greater +import io.airbyte.cdk.read.LesserOrEqual +import io.airbyte.cdk.read.Limit +import io.airbyte.cdk.read.Or +import io.airbyte.cdk.read.OrderBy +import io.airbyte.cdk.read.SelectColumnMaxValue +import io.airbyte.cdk.read.SelectColumns +import io.airbyte.cdk.read.SelectQuery +import io.airbyte.cdk.read.SelectQuerySpec +import io.airbyte.cdk.read.Where +import io.airbyte.cdk.read.optimize +import io.airbyte.cdk.util.Jsons +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +class MsSqlServerSourceSelectQueryGeneratorTest { + @Test + fun testSelectLimit0() { + SelectQuerySpec( + SelectColumns( + listOf( + Field("k", IntFieldType), + Field("v", StringFieldType), + ), + ), + From("TBL", "SC"), + limit = Limit(0), + ) + .assertSqlEquals("""SELECT TOP 0 k, v FROM SC.TBL""") + } + + @Test + fun testSelectMaxCursor() { + SelectQuerySpec( + SelectColumnMaxValue(Field("ts", OffsetDateTimeFieldType)), + From("TBL", "SC"), + ) + .assertSqlEquals("""SELECT MAX(ts) FROM SC.TBL""") + } + + @Test + fun testSelectForNonResumableInitialSync() { + SelectQuerySpec( + SelectColumns( + listOf( + Field("k", IntFieldType), + Field("v", StringFieldType), + ), + ), + From("TBL", "SC"), + ) + .assertSqlEquals("""SELECT k, v FROM SC.TBL""") + } + + @Test + fun testSelectForResumableInitialSync() { + val k1 = Field("k1", IntFieldType) + val v1 = Jsons.numberNode(10) + val k2 = Field("k2", IntFieldType) + val v2 = Jsons.numberNode(20) + val k3 = Field("k3", IntFieldType) + val v3 = Jsons.numberNode(30) + SelectQuerySpec( + SelectColumns(listOf(k1, k2, k3, Field("msg", StringFieldType))), + From("TBL", "SC"), + Where( + Or( + listOf( + And(listOf(Greater(k1, v1))), + And(listOf(Equal(k1, v1), Greater(k2, v2))), + And(listOf(Equal(k1, v1), Equal(k2, v2), Greater(k3, v3))), + ), + ), + ), + OrderBy(listOf(k1, k2, k3)), + Limit(1000), + ) + .assertSqlEquals( + """SELECT TOP 1000 k1, k2, k3, msg FROM """ + + """SC.TBL WHERE (k1 > ?) OR """ + + """((k1 = ?) AND (k2 > ?)) OR """ + + """((k1 = ?) AND (k2 = ?) AND (k3 > ?)) """ + + """ORDER BY k1, k2, k3""", + v1 to IntFieldType, + v1 to IntFieldType, + v2 to IntFieldType, + v1 to IntFieldType, + v2 to IntFieldType, + v3 to IntFieldType, + ) + } + + @Test + fun testSelectForCursorBasedIncrementalSync() { + val c = Field("c", DoubleFieldType) + val lb = Jsons.numberNode(0.5) + val ub = Jsons.numberNode(0.5) + SelectQuerySpec( + SelectColumns(listOf(Field("msg", StringFieldType), c)), + From("TBL", "SC"), + Where(And(listOf(Greater(c, lb), LesserOrEqual(c, ub)))), + OrderBy(listOf(c)), + Limit(1000), + ) + .assertSqlEquals( + """SELECT TOP 1000 msg, c FROM """ + + """SC.TBL """ + + """WHERE (c > ?) AND (c <= ?) ORDER BY c""", + lb to DoubleFieldType, + ub to DoubleFieldType, + ) + } + + private fun SelectQuerySpec.assertSqlEquals( + sql: String, + vararg bindings: Pair>, + ) { + val expected = + SelectQuery( + sql, + select.columns, + bindings.map { SelectQuery.Binding(it.first, it.second) }, + ) + val actual: SelectQuery = MsSqlServerSelectQueryGenerator().generate(this.optimize()) + Assertions.assertEquals(expected, actual) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSpecIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSpecIntegrationTest.kt new file mode 100644 index 0000000000000..54db1f9d7ba37 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSpecIntegrationTest.kt @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.ObjectMapper +import io.airbyte.cdk.command.CliRunner +import io.airbyte.cdk.command.SyncsTestFixture +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.cdk.util.Jsons +import io.airbyte.cdk.util.ResourceUtils +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.Field +import io.airbyte.protocol.models.JsonSchemaType +import io.airbyte.protocol.models.v0.* +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test +import java.nio.file.Files +import java.nio.file.Paths + + +class MsSqlServerSpecIntegrationTest { + @Test + fun testSpec() { + SyncsTestFixture.testSpec("expected_spec.json") + } + + @Test + fun testCheck() { + val it = MsSqlServerContainerFactory.shared(MsSqlServerImage.SQLSERVER_2022) + SyncsTestFixture.testCheck(MsSqlServerContainerFactory.config(it)) + } + + @Test + fun testDiscover() { + val container = MsSqlServerContainerFactory.shared(MsSqlServerImage.SQLSERVER_2022) + val config = MsSqlServerContainerFactory.config(container) + val discoverOutput: BufferingOutputConsumer = CliRunner.source("discover", config).run() + Assertions.assertEquals(listOf(AirbyteCatalog().withStreams(listOf( + AirbyteStream() + .withName("id_name_and_born") + .withJsonSchema(Jsons.readTree("""{"type":"object","properties":{"born":{"type":"string"},"name":{"type":"string"},"id":{"type":"number","airbyte_type":"integer"}}}""")) + .withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedCursor(false) + .withNamespace(config.schemas!![0]) + .withSourceDefinedPrimaryKey(listOf(listOf("id"))) + .withIsResumable(true), + AirbyteStream() + .withName("name_and_born") + .withJsonSchema(Jsons.readTree("""{"type":"object","properties":{"born":{"type":"string"},"name":{"type":"string"}}}""")) + .withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedCursor(false) + .withNamespace(config.schemas!![0]) + ))), discoverOutput.catalogs()) + } + + @Test + fun testSync() { + val container = MsSqlServerContainerFactory.shared(MsSqlServerImage.SQLSERVER_2022) + val config = MsSqlServerContainerFactory.config(container) + val configuredCatalog = ConfiguredAirbyteCatalog().withStreams( + listOf( + ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream( + CatalogHelpers.createAirbyteStream( + "name_and_born", config.schemas!![0], + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING) + ) + .withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + ), + ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withCursorField(listOf("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream( + CatalogHelpers.createAirbyteStream( + "id_name_and_born", config.schemas!![0], + Field.of("id", JsonSchemaType.INTEGER), + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING) + ) + .withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + ) + ) + ) + + val readOutput: BufferingOutputConsumer = + CliRunner.source("read", config, configuredCatalog, listOf()).run() + //println("SGXX records=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.records())}") + println("SGXX: specs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.specs())}") + println("SGXX logs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.logs())}") + println("SGXX states=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.states())}") + println("SGXX statuses=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.statuses())}") + //println("SGXX messages=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.messages())}") + println("SGXX traces=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.traces())}") + //println("SGXX readOutput=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput)}") + + } + + @Test + fun testSync2() { + //val container = MsSqlServerContainerFactory.shared(MsSqlServerContainerFactory.SQLSERVER_2022) + val configString = Files.readString(Paths.get("secrets/config-cdc.json")) + val config = Jsons.readValue( + configString, + MsSqlServerSourceConfigurationSpecification::class.java, + ) + println("SGX config=$configString") + + val catalog = SyncsTestFixture.configuredCatalogFromResource("catalog-cdc-single-stream.json") + CliRunner.source("discover", config).run() + println("SGX catalogString=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(catalog)}") + + + val readOutput: BufferingOutputConsumer = + CliRunner.source("read", config, catalog, listOf()).run() + //println("SGXX records=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.records())}") + println("SGXX: specs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.specs())}") + println("SGXX logs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.logs())}") + println("SGXX states=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.states())}") + println("SGXX statuses=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.statuses())}") + //println("SGXX messages=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.messages())}") + println("SGXX traces=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.traces())}") + //println("SGXX readOutput=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput)}") + + } + + @Test + fun testSyncWithAlwaysNullCursor() { + //val container = MsSqlServerContainerFactory.shared(MsSqlServerContainerFactory.SQLSERVER_2022) + val configString = Files.readString(Paths.get("secrets/config-cdc.json")) + val config = Jsons.readValue( + configString, + MsSqlServerSourceConfigurationSpecification::class.java, + ) + println("SGX config=$configString") + + val catalog = SyncsTestFixture.configuredCatalogFromResource("catalog-cdc-dbo-users.json") + CliRunner.source("discover", config).run() + println("SGX catalogString=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(catalog)}") + + + val readOutput: BufferingOutputConsumer = + CliRunner.source("read", config, catalog, listOf()).run() + //println("SGXX records=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.records())}") + println("SGXX: specs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.specs())}") + println("SGXX logs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.logs())}") + println("SGXX states=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.states())}") + println("SGXX statuses=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.statuses())}") + //println("SGXX messages=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.messages())}") + println("SGXX traces=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.traces())}") + //println("SGXX readOutput=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput)}") + + } + + @Test + fun testSyncEmptyTable() { + val configString = Files.readString(Paths.get("secrets/config-cdc.json")) + val config = Jsons.readValue( + configString, + MsSqlServerSourceConfigurationSpecification::class.java, + ) + val configuredCatalog = ConfiguredAirbyteCatalog().withStreams( + listOf( + ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withCursorField(listOf("born")) + .withPrimaryKey(listOf(listOf("born"))) + .withStream( + CatalogHelpers.createAirbyteStream( + "name_born", "dbo", + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING) + ) + .withSupportedSyncModes(listOf(SyncMode.INCREMENTAL)) + ) + ) + ) + CliRunner.source("discover", config).run() + println("SGX catalogString=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(configuredCatalog)}") + + val readOutput: BufferingOutputConsumer = CliRunner.source("read", config, configuredCatalog, listOf()).run() + println("SGXX messages=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.messages())}") + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcDatatypeIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcDatatypeIntegrationTest.kt new file mode 100644 index 0000000000000..61c9578965fca --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcDatatypeIntegrationTest.kt @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.cdk.ClockFactory +import io.airbyte.cdk.command.CliRunner +import io.airbyte.cdk.data.AirbyteSchemaType +import io.airbyte.cdk.data.LeafAirbyteSchemaType +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.AirbyteRecordMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.CatalogHelpers +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream +import io.airbyte.protocol.models.v0.SyncMode +import io.github.oshai.kotlinlogging.KotlinLogging +import java.sql.Connection +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.DynamicContainer +import org.junit.jupiter.api.DynamicNode +import org.junit.jupiter.api.DynamicTest +import org.junit.jupiter.api.TestFactory +import org.junit.jupiter.api.Timeout +import org.testcontainers.containers.MSSQLServerContainer + +private val log = KotlinLogging.logger {} + +class MysqlCdcDatatypeIntegrationTest { + /* + @TestFactory + @Timeout(300) + fun syncTests(): Iterable { + val read: DynamicNode = + DynamicTest.dynamicTest("read") { + Assertions.assertFalse(LazyValues.actualReads.isEmpty()) + } + val cases: List = + allStreamNamesAndRecordData.keys.map { streamName: String -> + DynamicContainer.dynamicContainer( + streamName, + listOf( + DynamicTest.dynamicTest("records") { records(streamName) }, + ), + ) + } + return listOf(read) + cases + } + + object LazyValues { + val actualStreams: Map by lazy { + val output: BufferingOutputConsumer = CliRunner.source("discover", config()).run() + output.catalogs().firstOrNull()?.streams?.filterNotNull()?.associateBy { it.name } + ?: mapOf() + } + + val configuredCatalog: ConfiguredAirbyteCatalog by lazy { + val configuredStreams: List = + allStreamNamesAndRecordData.keys + .mapNotNull { actualStreams[it] } + .map { + CatalogHelpers.toDefaultConfiguredStream(it) + .withCursorField( + listOf(MysqlCdcMetaFields.CDC_CURSOR.id), + ) + } + + for (configuredStream in configuredStreams) { + if (configuredStream.stream.supportedSyncModes.contains(SyncMode.INCREMENTAL)) { + configuredStream.syncMode = SyncMode.INCREMENTAL + } + } + ConfiguredAirbyteCatalog().withStreams(configuredStreams) + } + + val allReadMessages: List by lazy { + // only get messsages from the 2nd run + val lastStateMessageFromFirstRun = + CliRunner.source("read", config(), configuredCatalog).run().states().last() + + // insert + connectionFactory + .get() + .also { it.isReadOnly = false } + .use { connection: Connection -> + for (case in testCases) { + for (sql in case.sqlInsertStatements) { + log.info { "test case ${case.id}: executing $sql" } + connection.createStatement().use { stmt -> stmt.execute(sql) } + } + } + } + + // Run it in dbz mode on 2nd time: + CliRunner.source( + "read", + config(), + configuredCatalog, + listOf(lastStateMessageFromFirstRun) + ) + .run() + .messages() + } + + val actualReads: Map by lazy { + val result: Map = + allStreamNamesAndRecordData.keys.associateWith { + BufferingOutputConsumer(ClockFactory().fixed()) + } + for (msg in allReadMessages) { + result[streamName(msg) ?: continue]?.accept(msg) + } + result + } + + fun streamName(msg: AirbyteMessage): String? = + when (msg.type) { + AirbyteMessage.Type.RECORD -> msg.record?.stream + else -> null + } + } + + private fun records(streamName: String) { + val actualRead: BufferingOutputConsumer? = LazyValues.actualReads[streamName] + Assertions.assertNotNull(actualRead) + + fun sortedRecordData(data: List): JsonNode = + Jsons.createArrayNode().apply { addAll(data.sortedBy { it.toString() }) } + + val actualRecords: List = actualRead?.records() ?: listOf() + + val records = actualRecords.mapNotNull { it.data } + + records.forEach { jsonNode -> + if (jsonNode is ObjectNode) { + // Remove unwanted fields + jsonNode.remove("_ab_cdc_updated_at") + jsonNode.remove("_ab_cdc_deleted_at") + jsonNode.remove("_ab_cdc_cursor") + jsonNode.remove("_ab_cdc_log_file") + jsonNode.remove("_ab_cdc_log_pos") + } + } + val actual: JsonNode = sortedRecordData(records) + + log.info { "test case $streamName: emitted records $actual" } + val expected: JsonNode = sortedRecordData(allStreamNamesAndRecordData[streamName]!!) + + Assertions.assertEquals(expected, actual) + } + + companion object { + lateinit var dbContainer: MSSQLServerContainer<*> + + fun config(): MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.cdcConfig(dbContainer) + + val connectionFactory: JdbcConnectionFactory by lazy { + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config())) + } + + val bitValues = + mapOf( + "b'1'" to "true", + "b'0'" to "false", + ) + + val longBitValues = + mapOf( + "b'10101010'" to """"qg=="""", + ) + + val stringValues = + mapOf( + "'abcdef'" to """"abcdef"""", + "'ABCD'" to """"ABCD"""", + "'OXBEEF'" to """"OXBEEF"""", + ) + + val yearValues = + mapOf( + "1992" to """1992""", + "2002" to """2002""", + "70" to """1970""", + ) + + val precisionTwoDecimalValues = + mapOf( + "0.2" to """0.2""", + ) + + val floatValues = + mapOf( + "123.4567" to """123.4567""", + ) + + val zeroPrecisionDecimalValues = + mapOf( + "2" to """2.0""", + ) + + val tinyintValues = + mapOf( + "10" to "10", + "4" to "4", + "2" to "2", + ) + + val intValues = + mapOf( + "10" to "10", + "100000000" to "100000000", + "200000000" to "200000000", + ) + + val dateValues = + mapOf( + "'2022-01-01'" to """"2022-01-01"""", + ) + + val timeValues = + mapOf( + "'14:30:00'" to """"14:30:00.000000"""", + ) + + val dateTimeValues = + mapOf( + "'2024-09-13 14:30:00'" to """"2024-09-13T14:30:00.000000"""", + "'2024-09-13T14:40:00+00:00'" to """"2024-09-13T14:40:00.000000"""", + ) + + val timestampValues = + mapOf( + "'2024-09-12 14:30:00'" to """"2024-09-12T14:30:00.000000Z"""", + "CONVERT_TZ('2024-09-12 14:30:00', 'America/Los_Angeles', 'UTC')" to + """"2024-09-12T21:30:00.000000Z"""", + ) + + val booleanValues = + mapOf( + "TRUE" to "true", + "FALSE" to "false", + ) + + val testCases: List = + listOf( + TestCase( + "BOOLEAN", + booleanValues, + airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN, + cursor = false, + ), + TestCase( + "VARCHAR(10)", + stringValues, + airbyteSchemaType = LeafAirbyteSchemaType.STRING, + ), + TestCase( + "DECIMAL(10,2)", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "DECIMAL(10,2) UNSIGNED", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "DECIMAL UNSIGNED", + zeroPrecisionDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "FLOAT", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "FLOAT(7,4)", + floatValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "FLOAT(53,8)", + floatValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "DOUBLE", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "DOUBLE UNSIGNED", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "TINYINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "TINYINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "SMALLINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "MEDIUMINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase("BIGINT", intValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "SMALLINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "MEDIUMINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "BIGINT UNSIGNED", + intValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase("INT", intValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "INT UNSIGNED", + intValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase("DATE", dateValues, airbyteSchemaType = LeafAirbyteSchemaType.DATE), + TestCase( + "TIMESTAMP", + timestampValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE, + ), + TestCase( + "DATETIME", + dateTimeValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE, + ), + TestCase( + "TIME", + timeValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE, + ), + TestCase("YEAR", yearValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "BIT", + bitValues, + airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN, + cursor = false, + ), + TestCase( + "BIT(8)", + longBitValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + ) + + val allStreamNamesAndRecordData: Map> = + testCases.flatMap { it.streamNamesToRecordData.toList() }.toMap() + + @JvmStatic + @BeforeAll + @Timeout(value = 300) + fun startAndProvisionTestContainer() { + dbContainer = + MsSqlServerContainerFactory.exclusive( + "mysql:8.0", + MsSqlServerContainerFactory.WithNetwork, + ) + + val gtidOn = + "SET @@GLOBAL.ENFORCE_GTID_CONSISTENCY = 'ON';" + + "SET @@GLOBAL.GTID_MODE = 'OFF_PERMISSIVE';" + + "SET @@GLOBAL.GTID_MODE = 'ON_PERMISSIVE';" + + "SET @@GLOBAL.GTID_MODE = 'ON';" + val grant = + "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT " + + "ON *.* TO '${dbContainer.username}'@'%';" + + dbContainer.execAsRoot(gtidOn) + dbContainer.execAsRoot(grant) + dbContainer.execAsRoot("FLUSH PRIVILEGES;") + connectionFactory + .get() + .also { it.isReadOnly = false } + .use { connection: Connection -> + for (case in testCases) { + for (sql in case.sqlStatements) { + log.info { "test case ${case.id}: executing $sql" } + connection.createStatement().use { stmt -> stmt.execute(sql) } + } + } + } + } + } + + data class TestCase( + val sqlType: String, + val sqlToAirbyte: Map, + val airbyteSchemaType: AirbyteSchemaType = LeafAirbyteSchemaType.STRING, + val cursor: Boolean = true, + val customDDL: List? = null, + ) { + val id: String + get() = + sqlType + .replace("[^a-zA-Z0-9]".toRegex(), " ") + .trim() + .replace(" +".toRegex(), "_") + .lowercase() + + val tableName: String + get() = "tbl_$id" + + val columnName: String + get() = "col_$id" + + val sqlStatements: List + get() { + return listOf( + "CREATE DATABASE IF NOT EXISTS test", + "USE test", + "CREATE TABLE IF NOT EXISTS $tableName " + "($columnName $sqlType PRIMARY KEY)", + "TRUNCATE TABLE $tableName", + ) + } + + val sqlInsertStatements: List + get() { + val result = + listOf("USE test;") + + sqlToAirbyte.keys.map { + "INSERT INTO $tableName ($columnName) VALUES ($it)" + } + return result + } + + val streamNamesToRecordData: Map> + get() { + val recordData: List = + sqlToAirbyte.values.map { Jsons.readTree("""{"${columnName}":$it}""") } + return mapOf(tableName to recordData) + } + } + + */ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcIntegrationTest.kt new file mode 100644 index 0000000000000..004ba14bae123 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcIntegrationTest.kt @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.StreamIdentifier +import io.airbyte.cdk.command.CliRunner +import io.airbyte.cdk.discover.DiscoveredStream +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.IntFieldType +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.jdbc.StringFieldType +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.CatalogHelpers +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream +import io.airbyte.protocol.models.v0.StreamDescriptor +import io.airbyte.protocol.models.v0.SyncMode +import io.github.oshai.kotlinlogging.KotlinLogging +import java.sql.Connection +import java.sql.Statement +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.Timeout +import org.testcontainers.containers.MSSQLServerContainer + +class MysqlCdcIntegrationTest { +/* + @Test + fun testCheck() { + val run1: BufferingOutputConsumer = CliRunner.source("check", config(), null).run() + + assertEquals(run1.messages().size, 1) + assertEquals( + run1.messages().first().connectionStatus.status, + AirbyteConnectionStatus.Status.SUCCEEDED + ) + + MsSqlServerContainerFactory.exclusive( + imageName = "mysql:8.0", + MsSqlServerContainerFactory.WithCdcOff, + ) + .use { nonCdcDbContainer -> + { + val invalidConfig: MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.config(nonCdcDbContainer).apply { + setMethodValue(CdcCursor()) + } + + val nonCdcConnectionFactory = + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(invalidConfig)) + + provisionTestContainer(nonCdcDbContainer, nonCdcConnectionFactory) + + val run2: BufferingOutputConsumer = + CliRunner.source("check", invalidConfig, null).run() + + val messageInRun2 = + run2 + .messages() + .filter { it.type == AirbyteMessage.Type.CONNECTION_STATUS } + .first() + + assertEquals( + AirbyteConnectionStatus.Status.FAILED, + messageInRun2.connectionStatus.status + ) + } + } + } + + @Test + fun test() { + CliRunner.source("read", config(), configuredCatalog).run() + // TODO: add assertions on run1 messages. + + connectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("INSERT INTO test.tbl (k, v) VALUES (3, 'baz')") + } + } + } + + @Test + fun testFullRefresh() { + val fullRefreshCatalog = + configuredCatalog.apply { streams.forEach { it.syncMode = SyncMode.FULL_REFRESH } } + CliRunner.source("read", config(), fullRefreshCatalog).run() + connectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("INSERT INTO test.tbl (k, v) VALUES (4, 'baz')") + } + } + } + + companion object { + val log = KotlinLogging.logger {} + lateinit var dbContainer: MSSQLServerContainer<*> + + fun config(): MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.config(dbContainer).apply { setMethodValue(CdcCursor()) } + + val connectionFactory: JdbcConnectionFactory by lazy { + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config())) + } + + val configuredCatalog: ConfiguredAirbyteCatalog = run { + val desc = StreamDescriptor().withName("tbl").withNamespace("test") + val discoveredStream = + DiscoveredStream( + id = StreamIdentifier.Companion.from(desc), + columns = listOf(Field("k", IntFieldType), Field("v", StringFieldType)), + primaryKeyColumnIDs = listOf(listOf("k")), + ) + val stream: AirbyteStream = MsSqlServerStreamFactory().createGlobal(discoveredStream) + val configuredStream: ConfiguredAirbyteStream = + CatalogHelpers.toDefaultConfiguredStream(stream) + .withSyncMode(SyncMode.INCREMENTAL) + .withPrimaryKey(discoveredStream.primaryKeyColumnIDs) + .withCursorField(listOf(MysqlCdcMetaFields.CDC_CURSOR.id)) + ConfiguredAirbyteCatalog().withStreams(listOf(configuredStream)) + } + + @JvmStatic + @BeforeAll + @Timeout(value = 300) + fun startAndProvisionTestContainer() { + dbContainer = + MsSqlServerContainerFactory.exclusive( + imageName = "mysql:8.0", + MsSqlServerContainerFactory.WithNetwork, + ) + provisionTestContainer(dbContainer, connectionFactory) + } + + fun provisionTestContainer( + targetContainer: MSSQLServerContainer<*>, + targetConnectionFactory: JdbcConnectionFactory + ) { + val gtidOn = + "SET @@GLOBAL.ENFORCE_GTID_CONSISTENCY = 'ON';" + + "SET @@GLOBAL.GTID_MODE = 'OFF_PERMISSIVE';" + + "SET @@GLOBAL.GTID_MODE = 'ON_PERMISSIVE';" + + "SET @@GLOBAL.GTID_MODE = 'ON';" + val grant = + "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT " + + "ON *.* TO '${targetContainer.username}'@'%';" + targetContainer.execAsRoot(gtidOn) + targetContainer.execAsRoot(grant) + targetContainer.execAsRoot("FLUSH PRIVILEGES;") + + targetConnectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("CREATE TABLE test.tbl(k INT PRIMARY KEY, v VARCHAR(80))") + } + connection.createStatement().use { stmt: Statement -> + stmt.execute("INSERT INTO test.tbl (k, v) VALUES (1, 'foo'), (2, 'bar')") + } + } + } + } + + */ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlJdbcPartitionFactoryTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlJdbcPartitionFactoryTest.kt new file mode 100644 index 0000000000000..07d3a95bbc595 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlJdbcPartitionFactoryTest.kt @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.node.BinaryNode +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.cdk.ClockFactory +import io.airbyte.cdk.StreamIdentifier +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.discover.MetaField +import io.airbyte.cdk.discover.MetaFieldDecorator +import io.airbyte.cdk.jdbc.BinaryStreamFieldType +import io.airbyte.cdk.jdbc.DefaultJdbcConstants +import io.airbyte.cdk.jdbc.IntFieldType +import io.airbyte.cdk.jdbc.OffsetDateTimeFieldType +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.cdk.read.* +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.v0.StreamDescriptor +import io.mockk.mockk +import java.time.OffsetDateTime +import java.util.Base64 +import kotlin.test.assertEquals +import kotlin.test.assertNull +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test + +class MysqlJdbcPartitionFactoryTest { + /* + companion object { + private val selectQueryGenerator = MsSqlServerSelectQueryGenerator() + private val sharedState = sharedState() + private val cdcSharedState = sharedState(global = true) + private val config = mockk() + + val mysqlJdbcPartitionFactory = + DefaultJdbcPartitionFactory(sharedState, selectQueryGenerator, config) + val mysqlCdcJdbcPartitionFactory = + DefaultJdbcPartitionFactory(cdcSharedState, selectQueryGenerator, config) + + val fieldId = Field("id", IntFieldType) + val stream = + Stream( + id = + StreamIdentifier.from( + StreamDescriptor().withNamespace("test").withName("stream1") + ), + schema = setOf(fieldId), + configuredSyncMode = ConfiguredSyncMode.INCREMENTAL, + configuredPrimaryKey = listOf(fieldId), + configuredCursor = fieldId, + ) + val timestampFieldId = Field("id2", OffsetDateTimeFieldType) + + val timestampStream = + Stream( + id = + StreamIdentifier.from( + StreamDescriptor().withNamespace("test").withName("stream2") + ), + schema = setOf(timestampFieldId), + configuredSyncMode = ConfiguredSyncMode.INCREMENTAL, + configuredPrimaryKey = listOf(timestampFieldId), + configuredCursor = timestampFieldId, + ) + + val binaryFieldId = Field("id3", BinaryStreamFieldType) + + val binaryStream = + Stream( + id = + StreamIdentifier.from( + StreamDescriptor().withNamespace("test").withName("stream3") + ), + schema = setOf(binaryFieldId), + configuredSyncMode = ConfiguredSyncMode.INCREMENTAL, + configuredPrimaryKey = listOf(binaryFieldId), + configuredCursor = binaryFieldId, + ) + + private fun sharedState( + global: Boolean = false, + ): DefaultJdbcSharedState { + + val configSpec = + MsSqlServerSourceConfigurationSpecification().apply { + host = "" + port = 0 + username = "foo" + password = "bar" + database = "localhost" + } + if (global) { + configSpec.setMethodValue(CdcCursor()) + } else { + configSpec.setMethodValue(UserDefinedCursor) + } + val configFactory = MsSqlServerSourceConfigurationFactory() + val configuration = configFactory.make(configSpec) + + val mockSelectQuerier = mockk() + + return DefaultJdbcSharedState( + configuration, + mockSelectQuerier, + DefaultJdbcConstants(), + ConcurrencyResource(configuration), + NoOpGlobalLockResource() + ) + } + + private fun streamFeedBootstrap( + stream: Stream, + incumbentStateValue: OpaqueStateValue? = null + ) = + StreamFeedBootstrap( + outputConsumer = BufferingOutputConsumer(ClockFactory().fixed()), + metaFieldDecorator = + object : MetaFieldDecorator { + override val globalCursor: MetaField? = null + override val globalMetaFields: Set = emptySet() + + override fun decorateRecordData( + timestamp: OffsetDateTime, + globalStateValue: OpaqueStateValue?, + stream: Stream, + recordData: ObjectNode + ) {} + }, + stateQuerier = + object : StateQuerier { + override val feeds: List = listOf(stream) + override fun current(feed: Feed): OpaqueStateValue? = + if (feed == stream) incumbentStateValue else null + }, + stream, + ) + } + + @Test + fun testColdStartWithPkCursorBased() { + val jdbcPartition = mysqlJdbcPartitionFactory.create(streamFeedBootstrap(stream)) + assertTrue(jdbcPartition is MysqlJdbcSnapshotWithCursorPartition) + } + + @Test + fun testColdStartWithPkCdc() { + val jdbcPartition = mysqlCdcJdbcPartitionFactory.create(streamFeedBootstrap(stream)) + assertTrue(jdbcPartition is MysqlJdbcCdcSnapshotPartition) + } + + @Test + fun testColdStartWithoutPk() { + val streamWithoutPk = + Stream( + id = + StreamIdentifier.from( + StreamDescriptor().withNamespace("test").withName("stream7") + ), + schema = setOf(fieldId), + configuredSyncMode = ConfiguredSyncMode.INCREMENTAL, + configuredPrimaryKey = listOf(), + configuredCursor = fieldId, + ) + val jdbcPartition = mysqlJdbcPartitionFactory.create(streamFeedBootstrap(streamWithoutPk)) + assertTrue(jdbcPartition is MysqlJdbcNonResumableSnapshotWithCursorPartition) + } + + @Test + fun testResumeFromCompletedCursorBasedRead() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "cursor": "2", + "version": 2, + "state_type": "cursor_based", + "stream_name": "stream1", + "cursor_field": [ + "id" + ], + "stream_namespace": "test", + "cursor_record_count": 1 + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue)) + assertTrue(jdbcPartition is MysqlJdbcCursorIncrementalPartition) + } + + @Test + fun testResumeFromCompletedCursorBasedReadTimestamp() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "cursor": "2025-09-03T05:23:35", + "version": 2, + "state_type": "cursor_based", + "stream_name": "stream2", + "cursor_field": [ + "id2" + ], + "stream_namespace": "test", + "cursor_record_count": 1 + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlJdbcPartitionFactory.create( + streamFeedBootstrap(timestampStream, incomingStateValue) + ) + assertTrue(jdbcPartition is MysqlJdbcCursorIncrementalPartition) + + assertEquals( + Jsons.valueToTree("2025-09-02T05:23:35.000000Z"), + (jdbcPartition as MysqlJdbcCursorIncrementalPartition).cursorLowerBound + ) + } + + @Test + fun testResumeFromCursorBasedReadInitialRead() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "pk_val": "9063170", + "pk_name": "id", + "version": 2, + "state_type": "primary_key", + "incremental_state": {} + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue)) + + assertTrue(jdbcPartition is MysqlJdbcSnapshotWithCursorPartition) + } + + @Test + fun testResumeFromCdcInitialRead() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "pk_val": "29999", + "pk_name": "id", + "version": 2, + "state_type": "primary_key", + "incremental_state": {} + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlCdcJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue)) + assertTrue(jdbcPartition is MysqlJdbcCdcSnapshotPartition) + } + + @Test + fun testResumeFromCdcInitialReadComplete() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "stream_name": "stream1", + "cursor_field": [], + "stream_namespace": "test" + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlCdcJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue)) + assertNull(jdbcPartition) + } + + @Test + fun testResumeFromCompletedCursorBasedReadBinary() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "cursor": "OQAAAAAAAAAAAAAAAAAAAA==", + "version": 2, + "state_type": "cursor_based", + "stream_name": "stream3", + "cursor_field": [ + "id3" + ], + "stream_namespace": "test", + "cursor_record_count": 1 + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlJdbcPartitionFactory.create(streamFeedBootstrap(binaryStream, incomingStateValue)) + assertTrue(jdbcPartition is MysqlJdbcCursorIncrementalPartition) + + assertEquals( + Jsons.valueToTree(Base64.getDecoder().decode("OQAAAAAAAAAAAAAAAAAAAA==")), + (jdbcPartition as MysqlJdbcCursorIncrementalPartition).cursorLowerBound + ) + } + */ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlSourceTestConfigurationFactory.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlSourceTestConfigurationFactory.kt new file mode 100644 index 0000000000000..3184ebec94c2a --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlSourceTestConfigurationFactory.kt @@ -0,0 +1,29 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.command.FeatureFlag +import io.airbyte.cdk.command.SourceConfigurationFactory +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.micronaut.context.annotation.Primary +import io.micronaut.context.annotation.Requires +import io.micronaut.context.env.Environment +import jakarta.inject.Singleton +import java.time.Duration + +@Singleton +@Requires(env = [Environment.TEST]) +@Primary +class MysqlSourceTestConfigurationFactory(val featureFlags: Set) : + SourceConfigurationFactory { + override fun makeWithoutExceptionHandling( + pojo: MsSqlServerSourceConfigurationSpecification, + ): MsSqlServerSourceConfiguration = + MsSqlServerSourceConfigurationFactory(featureFlags) + .makeWithoutExceptionHandling(pojo) + /*.copy( + maxConcurrency = 1, + checkpointTargetInterval = Duration.ofSeconds(3), + debeziumHeartbeatInterval = Duration.ofMillis(100), + debeziumKeepAliveInterval = Duration.ofSeconds(1), + )*/ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-dbo-users.json b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-dbo-users.json new file mode 100644 index 0000000000000..a8c31cbee114e --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-dbo-users.json @@ -0,0 +1,125 @@ +{ + "streams" : [ { + "fields" : [ { + "name" : "occupation", + "type" : "STRING" + }, { + "name" : "gender", + "type" : "STRING" + }, { + "name" : "academic_degree", + "type" : "STRING" + }, { + "name" : "weight", + "type" : "INTEGER" + }, { + "name" : "created_at", + "type" : "STRING" + }, { + "name" : "language", + "type" : "STRING" + }, { + "name" : "telephone", + "type" : "STRING" + }, { + "name" : "title", + "type" : "STRING" + }, { + "name" : "updated_at", + "type" : "TIMESTAMP_WITHOUT_TIMEZONE" + }, { + "name" : "nationality", + "type" : "STRING" + }, { + "name" : "blood_type", + "type" : "STRING" + }, { + "name" : "name", + "type" : "STRING" + }, { + "name" : "id", + "type" : "INTEGER" + }, { + "name" : "age", + "type" : "INTEGER" + }, { + "name" : "email", + "type" : "STRING" + }, { + "name" : "height", + "type" : "NUMBER" + } ], + "stream" : { + "name" : "users", + "namespace" : "dbo", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "age" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "name" : { + "type" : "string" + }, + "email" : { + "type" : "string" + }, + "title" : { + "type" : "string" + }, + "gender" : { + "type" : "string" + }, + "height" : { + "type" : "number" + }, + "weight" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "language" : { + "type" : "string" + }, + "telephone" : { + "type" : "string" + }, + "blood_type" : { + "type" : "string" + }, + "created_at" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "occupation" : { + "type" : "string" + }, + "updated_at" : { + "type" : "string", + "format" : "date-time", + "airbyte_type" : "timestamp_without_timezone" + }, + "nationality" : { + "type" : "string" + }, + "academic_degree" : { + "type" : "string" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "incremental", + "primary_key" : [ [ "id" ] ], + "cursor_field" : [ "academic_degree" ], + "destination_sync_mode" : "append_dedup" + }] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-single-stream.json b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-single-stream.json new file mode 100644 index 0000000000000..5f6c099c9fa3b --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-single-stream.json @@ -0,0 +1,44 @@ +{ + "streams" : [ { + "fields" : [ { + "name" : "bin", + "type" : "STRING" + }, { + "name" : "id", + "type" : "STRING" + }, { + "name" : "pmid", + "type" : "INTEGER" + } ], + "stream" : { + "name" : "NewTable", + "namespace" : "dbo", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "string" + }, + "bin" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "pmid" : { + "type" : "number", + "airbyte_type" : "integer" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "pmid" ] ] + }, + "mappers" : [ ], + "sync_mode" : "incremental", + "primary_key" : [ [ "pmid" ] ], + "cursor_field" : [ "pmid" ], + "destination_sync_mode" : "append_dedup" + } ] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc.json b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc.json new file mode 100644 index 0000000000000..775b9f8eb6142 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc.json @@ -0,0 +1,1033 @@ +{ + "streams" : [ { + "fields" : [ { + "name" : "column_id", + "type" : "INTEGER" + }, { + "name" : "masking_function", + "type" : "STRING" + }, { + "name" : "column_name", + "type" : "STRING" + }, { + "name" : "is_computed", + "type" : "BOOLEAN" + }, { + "name" : "column_type", + "type" : "STRING" + }, { + "name" : "object_id", + "type" : "INTEGER" + }, { + "name" : "column_ordinal", + "type" : "INTEGER" + } ], + "stream" : { + "name" : "captured_columns", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "column_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "object_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "column_name" : { + "type" : "string" + }, + "column_type" : { + "type" : "string" + }, + "is_computed" : { + "type" : "boolean" + }, + "column_ordinal" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "masking_function" : { + "type" : "string" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "object_id" ], [ "column_ordinal" ] ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ [ "object_id" ], [ "column_ordinal" ] ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "partition_switch", + "type" : "BOOLEAN" + }, { + "name" : "has_drop_pending", + "type" : "BOOLEAN" + }, { + "name" : "supports_net_changes", + "type" : "BOOLEAN" + }, { + "name" : "version", + "type" : "INTEGER" + }, { + "name" : "object_id", + "type" : "INTEGER" + }, { + "name" : "role_name", + "type" : "STRING" + }, { + "name" : "start_lsn", + "type" : "STRING" + }, { + "name" : "filegroup_name", + "type" : "STRING" + }, { + "name" : "source_object_id", + "type" : "INTEGER" + }, { + "name" : "end_lsn", + "type" : "STRING" + }, { + "name" : "create_date", + "type" : "TIMESTAMP_WITHOUT_TIMEZONE" + }, { + "name" : "index_name", + "type" : "STRING" + }, { + "name" : "capture_instance", + "type" : "STRING" + } ], + "stream" : { + "name" : "change_tables", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "end_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "version" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "object_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "role_name" : { + "type" : "string" + }, + "start_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "index_name" : { + "type" : "string" + }, + "create_date" : { + "type" : "string", + "format" : "date-time", + "airbyte_type" : "timestamp_without_timezone" + }, + "filegroup_name" : { + "type" : "string" + }, + "capture_instance" : { + "type" : "string" + }, + "has_drop_pending" : { + "type" : "boolean" + }, + "partition_switch" : { + "type" : "boolean" + }, + "source_object_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "supports_net_changes" : { + "type" : "boolean" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "object_id" ] ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ [ "object_id" ] ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "bin", + "type" : "STRING" + }, { + "name" : "id", + "type" : "STRING" + } ], + "stream" : { + "name" : "data_type", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "string" + }, + "bin" : { + "type" : "string", + "contentEncoding" : "base64" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "__$seqval", + "type" : "STRING" + }, { + "name" : "__$command_id", + "type" : "INTEGER" + }, { + "name" : "bin", + "type" : "STRING" + }, { + "name" : "__$end_lsn", + "type" : "STRING" + }, { + "name" : "__$update_mask", + "type" : "STRING" + }, { + "name" : "__$operation", + "type" : "INTEGER" + }, { + "name" : "id", + "type" : "STRING" + }, { + "name" : "__$start_lsn", + "type" : "STRING" + } ], + "stream" : { + "name" : "dbo_NewTable_CT", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "string" + }, + "bin" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$seqval" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$end_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$operation" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "__$start_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$command_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "__$update_mask" : { + "type" : "string", + "contentEncoding" : "base64" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "__$seqval", + "type" : "STRING" + }, { + "name" : "__$command_id", + "type" : "INTEGER" + }, { + "name" : "FirstName", + "type" : "STRING" + }, { + "name" : "__$end_lsn", + "type" : "STRING" + }, { + "name" : "__$update_mask", + "type" : "STRING" + }, { + "name" : "__$operation", + "type" : "INTEGER" + }, { + "name" : "ID", + "type" : "INTEGER" + }, { + "name" : "LastName", + "type" : "STRING" + }, { + "name" : "Age", + "type" : "INTEGER" + }, { + "name" : "__$start_lsn", + "type" : "STRING" + } ], + "stream" : { + "name" : "dbo_Persons_CT", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "ID" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "Age" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "LastName" : { + "type" : "string" + }, + "FirstName" : { + "type" : "string" + }, + "__$seqval" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$end_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$operation" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "__$start_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$command_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "__$update_mask" : { + "type" : "string", + "contentEncoding" : "base64" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "__$seqval", + "type" : "STRING" + }, { + "name" : "__$command_id", + "type" : "INTEGER" + }, { + "name" : "__$end_lsn", + "type" : "STRING" + }, { + "name" : "__$update_mask", + "type" : "STRING" + }, { + "name" : "__$operation", + "type" : "INTEGER" + }, { + "name" : "id", + "type" : "INTEGER" + }, { + "name" : "test_column", + "type" : "STRING" + }, { + "name" : "__$start_lsn", + "type" : "STRING" + } ], + "stream" : { + "name" : "dbo_dbo_1_datetimeoffset_CT", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "__$seqval" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$end_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "test_column" : { + "type" : "string" + }, + "__$operation" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "__$start_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "__$command_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "__$update_mask" : { + "type" : "string", + "contentEncoding" : "base64" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "ddl_time", + "type" : "TIMESTAMP_WITHOUT_TIMEZONE" + }, { + "name" : "ddl_command", + "type" : "STRING" + }, { + "name" : "source_object_id", + "type" : "INTEGER" + }, { + "name" : "required_column_update", + "type" : "BOOLEAN" + }, { + "name" : "ddl_lsn", + "type" : "STRING" + }, { + "name" : "object_id", + "type" : "INTEGER" + } ], + "stream" : { + "name" : "ddl_history", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "ddl_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "ddl_time" : { + "type" : "string", + "format" : "date-time", + "airbyte_type" : "timestamp_without_timezone" + }, + "object_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "ddl_command" : { + "type" : "string" + }, + "source_object_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "required_column_update" : { + "type" : "boolean" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "object_id" ], [ "ddl_lsn" ] ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ [ "object_id" ], [ "ddl_lsn" ] ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "column_id", + "type" : "INTEGER" + }, { + "name" : "index_ordinal", + "type" : "INTEGER" + }, { + "name" : "column_name", + "type" : "STRING" + }, { + "name" : "object_id", + "type" : "INTEGER" + } ], + "stream" : { + "name" : "index_columns", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "column_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "object_id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "column_name" : { + "type" : "string" + }, + "index_ordinal" : { + "type" : "number", + "airbyte_type" : "integer" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "object_id" ], [ "index_ordinal" ], [ "column_id" ] ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ [ "object_id" ], [ "index_ordinal" ], [ "column_id" ] ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "start_lsn", + "type" : "STRING" + }, { + "name" : "tran_begin_time", + "type" : "TIMESTAMP_WITHOUT_TIMEZONE" + }, { + "name" : "tran_id", + "type" : "STRING" + }, { + "name" : "tran_end_time", + "type" : "TIMESTAMP_WITHOUT_TIMEZONE" + }, { + "name" : "tran_begin_lsn", + "type" : "STRING" + } ], + "stream" : { + "name" : "lsn_time_mapping", + "namespace" : "cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "tran_id" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "start_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "tran_end_time" : { + "type" : "string", + "format" : "date-time", + "airbyte_type" : "timestamp_without_timezone" + }, + "tran_begin_lsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "tran_begin_time" : { + "type" : "string", + "format" : "date-time", + "airbyte_type" : "timestamp_without_timezone" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "start_lsn" ] ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ [ "start_lsn" ] ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "bin", + "type" : "STRING" + }, { + "name" : "id", + "type" : "STRING" + }, { + "name" : "pmid", + "type" : "INTEGER" + } ], + "stream" : { + "name" : "NewTable", + "namespace" : "dbo", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "string" + }, + "bin" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "pmid" : { + "type" : "number", + "airbyte_type" : "integer" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "pmid" ] ] + }, + "mappers" : [ ], + "sync_mode" : "incremental", + "primary_key" : [ [ "pmid" ] ], + "cursor_field" : [ "pmid" ], + "destination_sync_mode" : "append_dedup" + }, { + "fields" : [ { + "name" : "FirstName", + "type" : "STRING" + }, { + "name" : "ID", + "type" : "INTEGER" + }, { + "name" : "LastName", + "type" : "STRING" + }, { + "name" : "Age", + "type" : "INTEGER" + } ], + "stream" : { + "name" : "Persons", + "namespace" : "dbo", + "json_schema" : { + "type" : "object", + "properties" : { + "ID" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "Age" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "LastName" : { + "type" : "string" + }, + "FirstName" : { + "type" : "string" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "ID" ] ] + }, + "mappers" : [ ], + "sync_mode" : "incremental", + "primary_key" : [ [ "ID" ] ], + "cursor_field" : [ "ID" ], + "destination_sync_mode" : "append_dedup" + }, { + "fields" : [ { + "name" : "d1", + "type" : "TIMESTAMP_WITHOUT_TIMEZONE" + }, { + "name" : "d2", + "type" : "STRING" + }, { + "name" : "d3", + "type" : "STRING" + } ], + "stream" : { + "name" : "datetime", + "namespace" : "dbo", + "json_schema" : { + "type" : "object", + "properties" : { + "d1" : { + "type" : "string", + "format" : "date-time", + "airbyte_type" : "timestamp_without_timezone" + }, + "d2" : { + "type" : "string" + }, + "d3" : { + "type" : "string" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "datetime", + "type" : "TIMESTAMP_WITHOUT_TIMEZONE" + }, { + "name" : "id", + "type" : "INTEGER" + }, { + "name" : "ts_time", + "type" : "STRING" + }, { + "name" : "test_column", + "type" : "STRING" + } ], + "stream" : { + "name" : "dbo_1_datetimeoffset", + "namespace" : "dbo", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "ts_time" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "datetime" : { + "type" : "string", + "format" : "date-time", + "airbyte_type" : "timestamp_without_timezone" + }, + "test_column" : { + "type" : "string" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "id" ] ] + }, + "mappers" : [ ], + "sync_mode" : "incremental", + "primary_key" : [ [ "id" ] ], + "cursor_field" : [ "id" ], + "destination_sync_mode" : "append_dedup" + }, { + "fields" : [ { + "name" : "tabid", + "type" : "INTEGER" + }, { + "name" : "startlsn", + "type" : "STRING" + }, { + "name" : "endlsn", + "type" : "STRING" + }, { + "name" : "typeid", + "type" : "INTEGER" + } ], + "stream" : { + "name" : "systranschemas", + "namespace" : "dbo", + "json_schema" : { + "type" : "object", + "properties" : { + "tabid" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "endlsn" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "typeid" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "startlsn" : { + "type" : "string", + "contentEncoding" : "base64" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "occupation", + "type" : "STRING" + }, { + "name" : "gender", + "type" : "STRING" + }, { + "name" : "academic_degree", + "type" : "STRING" + }, { + "name" : "weight", + "type" : "INTEGER" + }, { + "name" : "created_at", + "type" : "STRING" + }, { + "name" : "language", + "type" : "STRING" + }, { + "name" : "telephone", + "type" : "STRING" + }, { + "name" : "title", + "type" : "STRING" + }, { + "name" : "updated_at", + "type" : "TIMESTAMP_WITHOUT_TIMEZONE" + }, { + "name" : "nationality", + "type" : "STRING" + }, { + "name" : "blood_type", + "type" : "STRING" + }, { + "name" : "name", + "type" : "STRING" + }, { + "name" : "id", + "type" : "INTEGER" + }, { + "name" : "age", + "type" : "INTEGER" + }, { + "name" : "email", + "type" : "STRING" + }, { + "name" : "height", + "type" : "NUMBER" + } ], + "stream" : { + "name" : "users", + "namespace" : "dbo", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "age" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "name" : { + "type" : "string" + }, + "email" : { + "type" : "string" + }, + "title" : { + "type" : "string" + }, + "gender" : { + "type" : "string" + }, + "height" : { + "type" : "number" + }, + "weight" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "language" : { + "type" : "string" + }, + "telephone" : { + "type" : "string" + }, + "blood_type" : { + "type" : "string" + }, + "created_at" : { + "type" : "string", + "contentEncoding" : "base64" + }, + "occupation" : { + "type" : "string" + }, + "updated_at" : { + "type" : "string", + "format" : "date-time", + "airbyte_type" : "timestamp_without_timezone" + }, + "nationality" : { + "type" : "string" + }, + "academic_degree" : { + "type" : "string" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "incremental", + "primary_key" : [ [ "id" ] ], + "cursor_field" : [ "academic_degree" ], + "destination_sync_mode" : "append_dedup" + }, { + "fields" : [ { + "name" : "born", + "type" : "STRING" + }, { + "name" : "name", + "type" : "STRING" + }, { + "name" : "id", + "type" : "INTEGER" + } ], + "stream" : { + "name" : "id_name_born", + "namespace" : "no_cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "number", + "airbyte_type" : "integer" + }, + "born" : { + "type" : "string" + }, + "name" : { + "type" : "string" + } + } + }, + "is_resumable" : true, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ [ "id" ] ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ [ "id" ] ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + }, { + "fields" : [ { + "name" : "born", + "type" : "STRING" + }, { + "name" : "name", + "type" : "STRING" + } ], + "stream" : { + "name" : "name_born", + "namespace" : "no_cdc", + "json_schema" : { + "type" : "object", + "properties" : { + "born" : { + "type" : "string" + }, + "name" : { + "type" : "string" + } + } + }, + "default_cursor_field" : [ ], + "supported_sync_modes" : [ "full_refresh", "incremental" ], + "source_defined_cursor" : false, + "source_defined_primary_key" : [ ] + }, + "mappers" : [ ], + "sync_mode" : "full_refresh", + "primary_key" : [ ], + "cursor_field" : [ ], + "destination_sync_mode" : "overwrite" + } ] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/container-license-acceptance.txt b/airbyte-integrations/connectors/source-mssql/src/test/resources/container-license-acceptance.txt new file mode 100644 index 0000000000000..7f099b0aa4e81 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/container-license-acceptance.txt @@ -0,0 +1 @@ +mcr.microsoft.com/mssql/server:2022-latest diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-mssql/src/test/resources/expected_spec.json new file mode 100644 index 0000000000000..029f803edce79 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/expected_spec.json @@ -0,0 +1,203 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/sources/mssql", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MSSQL Source Spec", + "type": "object", + "required": ["host", "port", "database", "username", "password"], + "properties": { + "host": { + "description": "The hostname of the database.", + "title": "Host", + "type": "string", + "order": 0 + }, + "port": { + "description": "The port of the database.", + "title": "Port", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "examples": ["1433"], + "default": 3306, + "order": 1 + }, + "database": { + "description": "The name of the database.", + "title": "Database", + "type": "string", + "examples": ["master"], + "order": 2 + }, + "schemas": { + "title": "Schemas", + "description": "The list of schemas to sync from. Defaults to user. Case sensitive.", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0, + "uniqueItems": true, + "default": ["dbo"], + "order": 3 + }, + "username": { + "description": "The username which is used to access the database.", + "title": "Username", + "type": "string", + "order": 4 + }, + "password": { + "description": "The password associated with the username.", + "title": "Password", + "type": "string", + "airbyte_secret": true, + "order": 5 + }, + "jdbc_url_params": { + "title": "JDBC URL Params", + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "type": "string", + "order": 6 + }, + "ssl_method": { + "title": "SSL Method", + "type": "object", + "description": "The encryption method which is used when communicating with the database.", + "order": 7, + "oneOf": [ + { + "type": "object", + "title": "Unencrypted", + "description": "Data transfer will not be encrypted.", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "default": "unencrypted", + "enum": ["unencrypted"] + } + }, + "additionalProperties": true + }, + { + "type": "object", + "title": "Encrypted (trust server certificate)", + "description": "Use the certificate provided by the server without verification. (For testing purposes only!)", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "default": "encrypted_trust_server_certificate", + "enum": ["encrypted_trust_server_certificate"] + } + }, + "additionalProperties": true + }, + { + "type": "object", + "title": "Encrypted (verify certificate)", + "description": "Verify and use the certificate provided by the server.", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "default": "encrypted_verify_certificate", + "enum": ["encrypted_verify_certificate"] + }, + "hostNameInCertificate": { + "title": "Host Name In Certificate", + "type": "string", + "description": "Specifies the host name of the server. The value of this property must match the subject property of the certificate.", + "order": 0 + }, + "certificate": { + "title": "Certificate", + "type": "string", + "description": "certificate of the server, or of the CA that signed the server certificate", + "order": 1, + "airbyte_secret": true, + "multiline": true + } + }, + "additionalProperties": true + } + ] + }, + "replication_method": { + "type": "object", + "title": "Update Method", + "description": "Configures how data is extracted from the database.", + "default": "CDC", + "display_type": "radio", + "order": 8, + "oneOf": [ + { + "type": "object", + "title": "Read Changes using Change Data Capture (CDC)", + "description": "Recommended - Incrementally reads new inserts, updates, and deletes using the SQL Server's change data capture feature. This must be enabled on your database.", + "required": ["method"], + "properties": { + "method": { + "type": "string", + "default": "CDC", + "enum": ["CDC"] + }, + "initial_waiting_seconds": { + "type": "integer", + "title": "Initial Waiting Time in Seconds (Advanced)", + "description": "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds.", + "default": 300, + "min": 120, + "max": 3600, + "order": 1 + }, + "invalid_cdc_cursor_position_behavior": { + "type": "string", + "title": "Invalid CDC position behavior (Advanced)", + "description": "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.", + "enum": ["Fail sync", "Re-sync data"], + "default": "Fail sync", + "order": 2 + }, + "queue_size": { + "type": "integer", + "title": "Size of the queue (Advanced)", + "description": "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.", + "default": 10000, + "order": 3, + "min": 1000, + "max": 10000 + }, + "initial_load_timeout_hours": { + "type": "integer", + "title": "Initial Load Timeout in Hours (Advanced)", + "description": "The amount of time an initial load is allowed to continue for before catching up on CDC logs.", + "default": 8, + "min": 4, + "max": 24, + "order": 4 + } + }, + "additionalProperties": true + }, + { + "type": "object", + "title": "Scan Changes with User Defined Cursor", + "description": "Incrementally detects new inserts and updates using the cursor column chosen when configuring a connection (e.g. created_at, updated_at).", + "required": ["method"], + "properties": { + "method": { + "type": "string", + "default": "STANDARD", + "enum": ["STANDARD"] + } + }, + "additionalProperties": true + } + ] + } + }, + "additionalProperties": true + } +} diff --git a/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlSourceConfigurationSpecification.kt b/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlSourceConfigurationSpecification.kt index f2741de200530..0154caac32793 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlSourceConfigurationSpecification.kt +++ b/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlSourceConfigurationSpecification.kt @@ -98,7 +98,7 @@ class MysqlSourceConfigurationSpecification : ConfigurationSpecification() { @JsonGetter("ssl_mode") @JsonSchemaTitle("Encryption") @JsonPropertyDescription( - "The encryption method with is used when communicating with the database.", + "The encryption method which is used when communicating with the database.", ) @JsonSchemaInject(json = """{"order":8}""") fun getEncryptionValue(): Encryption? = encryptionJson ?: encryption.asEncryption() @@ -321,7 +321,7 @@ data object UserDefinedCursor : CursorMethodConfiguration @JsonSchemaDescription( "Recommended - " + "Incrementally reads new inserts, updates, and deletes using Mysql's change data capture feature. This must be enabled on your database.", ) class CdcCursor : CursorMethodConfiguration { diff --git a/airbyte-integrations/connectors/source-mysql/src/test/resources/expected-spec.json b/airbyte-integrations/connectors/source-mysql/src/test/resources/expected-spec.json index 329b2434bd724..1c0fe1cde5c37 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/resources/expected-spec.json +++ b/airbyte-integrations/connectors/source-mysql/src/test/resources/expected-spec.json @@ -154,7 +154,7 @@ ], "order": 8, "title": "Encryption", - "description": "The encryption method with is used when communicating with the database." + "description": "The encryption method which is used when communicating with the database." }, "username": { "type": "string", @@ -365,7 +365,7 @@ "description": "Enter the configured MySQL server timezone. This should only be done if the configured timezone in your MySQL instance does not conform to IANNA standard." } }, - "description": "Recommended - Incrementally reads new inserts, updates, and deletes using Mysql's change data capture feature. This must be enabled on your database.", + "description": "Recommended - Incrementally reads new inserts, updates, and deletes using Mysql's change data capture feature. This must be enabled on your database.", "additionalProperties": true } ],