From 1058bcce6effa2adc6a189d0825a192712e77c39 Mon Sep 17 00:00:00 2001 From: Stephane Geneix Date: Tue, 12 Nov 2024 11:55:15 -0800 Subject: [PATCH] source-mssql: convert to bulk CDK --- .../connectors/source-mssql-v1/README.md | 33 + .../acceptance-test-config.yml | 9 + .../connectors/source-mssql-v1/build.gradle | 41 + .../source-mssql-v1/gradle.properties | 2 + .../connectors/source-mssql-v1/icon.svg | 1 + .../integration_tests/acceptance.py | 16 + .../integration_tests/seed/basic.sql | 228 ++++ .../integration_tests/seed/full.sql | 320 +++++ .../seed/full_without_nulls.sql | 380 ++++++ .../connectors/source-mssql-v1/metadata.yaml | 66 + .../source/mssql/MsSqlSpecConstants.java | 0 .../MssqlCdcConnectorMetadataInjector.java | 0 .../source/mssql/MssqlCdcHelper.java | 0 .../mssql/MssqlCdcSavedInfoFetcher.java | 0 .../source/mssql/MssqlCdcStateHandler.java | 0 .../source/mssql/MssqlCdcTargetPosition.java | 0 .../source/mssql/MssqlDebeziumConverter.java | 0 .../source/mssql/MssqlQueryUtils.java | 0 .../source/mssql/MssqlSource.java | 0 .../source/mssql/MssqlSourceOperations.java | 0 .../mssql/cdc/MssqlCdcStateConstants.java | 0 .../mssql/cdc/MssqlDebeziumStateUtil.java | 0 .../MssqlCursorBasedStateManager.java | 0 .../initialsync/CdcMetadataInjector.java | 0 .../MssqlInitialLoadGlobalStateManager.java | 0 .../initialsync/MssqlInitialLoadHandler.java | 0 .../MssqlInitialLoadRecordIterator.java | 0 .../MssqlInitialLoadStateManager.java | 0 .../MssqlInitialLoadStreamStateManager.java | 0 .../initialsync/MssqlInitialReadUtil.java | 0 .../mssql/MSSqlSourceExceptionHandler.kt | 0 .../src/main/resources/spec.json | 0 .../AbstractMssqlSourceDatatypeTest.java | 0 .../AbstractSshMssqlSourceAcceptanceTest.java | 0 .../mssql/CdcMssqlSourceAcceptanceTest.java | 0 .../mssql/CdcMssqlSourceDatatypeTest.java | 0 ...ntSslEnabledMssqlSourceAcceptanceTest.java | 0 .../mssql/MssqlSourceAcceptanceTest.java | 0 .../source/mssql/MssqlSourceDatatypeTest.java | 0 .../mssql/MssqlSourceOperationsTest.java | 0 .../SshKeyMssqlSourceAcceptanceTest.java | 0 .../SshPasswordMssqlSourceAcceptanceTest.java | 0 .../SslEnabledMssqlSourceAcceptanceTest.java | 0 .../resources/dummy_config.json | 0 .../resources/expected_spec.json | 0 .../mssql/FillMsSqlTestDbScriptTest.java | 0 .../mssql/MssqlSourcePerformanceTest.java | 0 .../sql/create_mssql_benchmarks.sql | 0 .../source/mssql/CdcMssqlSourceTest.java | 0 .../source/mssql/CdcMssqlSslSourceTest.java | 0 .../source/mssql/CdcStateCompressionTest.java | 0 .../mssql/CloudDeploymentMssqlTest.java | 0 .../source/mssql/MssqlAgentStateTest.java | 0 .../source/mssql/MssqlCdcHelperTest.java | 0 .../mssql/MssqlDataSourceFactoryTest.java | 0 .../mssql/MssqlDebeziumStateUtilTest.java | 0 .../mssql/MssqlInitialLoadHandlerTest.java | 0 .../mssql/MssqlJdbcSourceAcceptanceTest.java | 0 .../source/mssql/MssqlSourceTest.java | 0 .../source/mssql/MssqlSslSourceTest.java | 0 .../source/mssql/MssqlStressTest.java | 0 .../source/mssql/MsSQLContainerFactory.java | 0 .../source/mssql/MsSQLTestDatabase.java | 0 ...sSqlTestDatabaseWithBackgroundThreads.java | 0 .../connectors/source-mssql/build.gradle | 28 +- .../connectors/source-mssql/metadata.yaml | 12 - ...MsSqlServerCdcInitialSnapshotStateValue.kt | 50 + .../mssql/MsSqlServerDebeziumOperations.kt | 63 + .../mssql/MsSqlServerFieldTypeMapper.kt | 112 ++ .../source/mssql/MsSqlServerJdbcPartition.kt | 351 +++++ .../mssql/MsSqlServerJdbcPartitionFactory.kt | 358 +++++ .../mssql/MsSqlServerJdbcStreamStateValue.kt | 92 ++ .../mssql/MsSqlServerSelectQueryGenerator.kt | 110 ++ .../source/mssql/MsSqlServerSource.kt | 18 + .../mssql/MsSqlServerSourceConfiguration.kt | 102 ++ .../source/mssql/MsSqlServerStreamFactory.kt | 74 ++ ...verEncryptionConfigurationSpecification.kt | 79 ++ ...icationMethodConfigurationSpecification.kt | 96 ++ ...lServerSourceConfigurationSpecification.kt | 120 ++ .../src/main/resources/application.yml | 12 + .../mssql/MsSqlServerContainerFactory.kt | 129 ++ .../MsSqlServerCursorBasedIntegrationTest.kt | 187 +++ ...verSourceConfigurationSpecificationTest.kt | 76 ++ .../MsSqlServerSourceConfigurationTest.kt | 155 +++ ...sSqlServerSourceDatatypeIntegrationTest.kt | 466 +++++++ ...SqlServerSourceSelectQueryGeneratorTest.kt | 140 ++ .../mssql/MsSqlServerSpecIntegrationTest.kt | 271 ++++ .../mssql/MysqlCdcDatatypeIntegrationTest.kt | 441 +++++++ .../source/mssql/MysqlCdcIntegrationTest.kt | 147 +++ .../mssql/MysqlJdbcPartitionFactoryTest.kt | 293 ++++ .../MysqlSourceTestConfigurationFactory.kt | 28 + .../test/resources/catalog-cdc-dbo-users.json | 144 ++ .../resources/catalog-cdc-single-stream.json | 50 + .../src/test/resources/catalog-cdc.json | 1174 +++++++++++++++++ .../container-license-acceptance.txt | 1 + .../src/test/resources/expected_spec.json | 203 +++ .../MysqlSourceConfigurationSpecification.kt | 4 +- .../src/test/resources/expected-spec.json | 4 +- 98 files changed, 6652 insertions(+), 34 deletions(-) create mode 100644 airbyte-integrations/connectors/source-mssql-v1/README.md create mode 100644 airbyte-integrations/connectors/source-mssql-v1/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-mssql-v1/build.gradle create mode 100644 airbyte-integrations/connectors/source-mssql-v1/gradle.properties create mode 100644 airbyte-integrations/connectors/source-mssql-v1/icon.svg create mode 100644 airbyte-integrations/connectors/source-mssql-v1/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/basic.sql create mode 100644 airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full.sql create mode 100644 airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full_without_nulls.sql create mode 100644 airbyte-integrations/connectors/source-mssql-v1/metadata.yaml rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/main/resources/spec.json (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/resources/dummy_config.json (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-integration/resources/expected_spec.json (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test-performance/sql/create_mssql_benchmarks.sql (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java (100%) rename airbyte-integrations/connectors/{source-mssql => source-mssql-v1}/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java (100%) create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCdcInitialSnapshotStateValue.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerDebeziumOperations.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerFieldTypeMapper.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartition.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartitionFactory.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcStreamStateValue.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSelectQueryGenerator.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSource.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfiguration.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerStreamFactory.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerEncryptionConfigurationSpecification.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerReplicationMethodConfigurationSpecification.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerSourceConfigurationSpecification.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/main/resources/application.yml create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerContainerFactory.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCursorBasedIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationSpecificationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceDatatypeIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceSelectQueryGeneratorTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSpecIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcDatatypeIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcIntegrationTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlJdbcPartitionFactoryTest.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlSourceTestConfigurationFactory.kt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-dbo-users.json create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-single-stream.json create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc.json create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/container-license-acceptance.txt create mode 100644 airbyte-integrations/connectors/source-mssql/src/test/resources/expected_spec.json diff --git a/airbyte-integrations/connectors/source-mssql-v1/README.md b/airbyte-integrations/connectors/source-mssql-v1/README.md new file mode 100644 index 000000000000..78a636b36e0f --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/README.md @@ -0,0 +1,33 @@ +# MsSQL (SQL Server) Source + +## Performance Test + +To run performance tests in commandline: + +```shell +./gradlew :airbyte-integrations:connectors:source-mssql:performanceTest [--cpulimit=cpulimit/] [--memorylimit=memorylimit/] +``` + +In pull request: + +```shell +/test-performance connector=connectors/source-mssql [--cpulimit=cpulimit/] [--memorylimit=memorylimit/] +``` + +- `cpulimit`: Limit the number of CPUs. The minimum is `2`. E.g. `--cpulimit=cpulimit/2`. +- `memorylimit`: Limit the size of the memory. Must include the unit at the end (e.g. `MB`, `GB`). The minimum size is `6MB`. E.g. `--memorylimit=memorylimit/4GB`. +- When none of the CPU or memory limit is provided, the performance tests will run without memory or CPU limitations. The available resource will be bound that those specified in `ResourceRequirements.java`. + +### Use MsSQL script to populate the benchmark database + +In order to create a database with a certain number of tables, and a certain number of records in each of them, +you need to follow a few simple steps. + +1. Create a new database. +2. Follow the TODOs in [create_mssql_benchmarks.sql](src/test-performance/sql/create_mssql_benchmarks.sql) to change the number of tables, and the number of records of different sizes. +3. Execute the script with your changes for the new database. You can run the script with the MySQL client: + ```bash + cd airbyte-integrations/connectors/source-mssql + sqlcmd -S Serverinstance -E -i src/test-performance/sql/create_mssql_benchmarks.sql + ``` +4. After the script finishes its work, you will receive the number of tables specified in the script, with names starting with **test_0** and ending with **test\_(the number of tables minus 1)**. diff --git a/airbyte-integrations/connectors/source-mssql-v1/acceptance-test-config.yml b/airbyte-integrations/connectors/source-mssql-v1/acceptance-test-config.yml new file mode 100644 index 000000000000..706d04b5d0b9 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/acceptance-test-config.yml @@ -0,0 +1,9 @@ +# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-mssql:dev +tests: + spec: + - spec_path: "src/test-integration/resources/expected_spec.json" + config_path: "src/test-integration/resources/dummy_config.json" + backward_compatibility_tests_config: + disable_for_version: "0.4.25" diff --git a/airbyte-integrations/connectors/source-mssql-v1/build.gradle b/airbyte-integrations/connectors/source-mssql-v1/build.gradle new file mode 100644 index 000000000000..14b581fdd24d --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/build.gradle @@ -0,0 +1,41 @@ +plugins { + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.45.1' + features = ['db-sources'] + useLocalCdk = false +} + +java { + // TODO: rewrite code to avoid javac warnings in the first place + compileJava { + options.compilerArgs += "-Xlint:-try,-rawtypes" + } + compileTestFixturesJava { + options.compilerArgs += "-Xlint:-this-escape" + } +} + +application { + mainClass = 'io.airbyte.integrations.source.mssql.MssqlSource' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + implementation 'com.microsoft.sqlserver:mssql-jdbc:12.6.1.jre11' + implementation 'io.debezium:debezium-embedded:2.7.1.Final' + implementation 'io.debezium:debezium-connector-sqlserver:2.6.2.Final' + implementation 'org.codehaus.plexus:plexus-utils:3.4.2' + + testFixturesImplementation 'org.testcontainers:mssqlserver:1.19.0' + + testImplementation 'org.awaitility:awaitility:4.2.0' + testImplementation 'org.hamcrest:hamcrest-all:1.3' + testImplementation 'org.testcontainers:mssqlserver:1.19.0' +} + +compileKotlin { + +} diff --git a/airbyte-integrations/connectors/source-mssql-v1/gradle.properties b/airbyte-integrations/connectors/source-mssql-v1/gradle.properties new file mode 100644 index 000000000000..9e4d90aa6508 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/gradle.properties @@ -0,0 +1,2 @@ +testExecutionConcurrency=-1 +JunitMethodExecutionTimeout=5 m \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql-v1/icon.svg b/airbyte-integrations/connectors/source-mssql-v1/icon.svg new file mode 100644 index 000000000000..edcaeb77c8f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql-v1/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/acceptance.py new file mode 100644 index 000000000000..9e6409236281 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("connector_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/basic.sql b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/basic.sql new file mode 100644 index 000000000000..616bc1b2e897 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/basic.sql @@ -0,0 +1,228 @@ +CREATE + DATABASE MSSQL_BASIC; + +USE MSSQL_BASIC; + +CREATE + TABLE + dbo.TEST_DATASET( + id INTEGER PRIMARY KEY, + test_column_1 BIGINT, + test_column_10 FLOAT, + test_column_11 REAL, + test_column_12 DATE, + test_column_13 smalldatetime, + test_column_14 datetime, + test_column_15 datetime2, + test_column_16 TIME, + test_column_18 CHAR, + test_column_2 INT, + test_column_20 text, + test_column_21 nchar, + test_column_22 nvarchar(MAX), + test_column_23 ntext, + test_column_25 VARBINARY(3), + test_column_3 SMALLINT, + test_column_4 tinyint, + test_column_6 DECIMAL( + 5, + 2 + ), + test_column_7 NUMERIC + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 1, + - 9223372036854775808, + '123', + '123', + '0001-01-01', + '1900-01-01', + '1753-01-01', + '0001-01-01', + '13:00:01', + 'a', + - 2147483648, + 'a', + 'a', + 'a', + 'a', + CAST( + 'ABC' AS VARBINARY + ), + - 32768, + 0, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 2, + 9223372036854775807, + '1234567890.1234567', + '1234567890.1234567', + '9999-12-31', + '2079-06-06', + '9999-12-31', + '9999-12-31', + '13:00:04Z', + '*', + 2147483647, + 'abc', + '*', + 'abc', + 'abc', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 3, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + 'Some test text 123$%^&*()_', + N'ї', + N'Миші йдуть на південь, не питай чому;', + N'Миші йдуть на південь, не питай чому;', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 4, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + '', + N'ї', + N'櫻花分店', + N'櫻花分店', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 5, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + '', + N'ї', + '', + '', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 6, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + '', + N'ї', + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 7, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '*', + 2147483647, + '', + N'ї', + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + CAST( + 'ABC' AS VARBINARY + ), + 32767, + 255, + 999.33, + '99999' + ); diff --git a/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full.sql b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full.sql new file mode 100644 index 000000000000..9d7a8a920429 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full.sql @@ -0,0 +1,320 @@ +CREATE + DATABASE MSSQL_FULL; + +USE MSSQL_FULL; + +CREATE + TABLE + dbo.TEST_DATASET( + id INTEGER PRIMARY KEY, + test_column_1 BIGINT, + test_column_10 FLOAT, + test_column_11 REAL, + test_column_12 DATE, + test_column_13 smalldatetime, + test_column_14 datetime, + test_column_15 datetime2, + test_column_16 TIME, + test_column_17 datetimeoffset, + test_column_18 CHAR, + test_column_19 VARCHAR(MAX) COLLATE Latin1_General_100_CI_AI_SC_UTF8, + test_column_2 INT, + test_column_20 text, + test_column_21 nchar, + test_column_22 nvarchar(MAX), + test_column_23 ntext, + test_column_24 BINARY, + test_column_25 VARBINARY(3), + test_column_26 geometry, + test_column_27 uniqueidentifier, + test_column_28 xml, + test_column_29 geography, + test_column_3 SMALLINT, + test_column_30 hierarchyid, + test_column_31 sql_variant, + test_column_4 tinyint, + test_column_5 bit, + test_column_6 DECIMAL( + 5, + 2 + ), + test_column_7 NUMERIC, + test_column_8 money, + test_column_9 smallmoney + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 1, + - 9223372036854775808, + '123', + '123', + '0001-01-01', + '1900-01-01', + '1753-01-01', + '0001-01-01', + NULL, + '0001-01-10 00:00:00 +01:00', + 'a', + 'a', + NULL, + 'a', + 'a', + 'a', + 'a', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '1', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + NULL, + '/1/1/', + 'a', + NULL, + NULL, + 999.33, + '99999', + NULL, + NULL + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 2, + 9223372036854775807, + '1234567890.1234567', + '1234567890.1234567', + '9999-12-31', + '2079-06-06', + '9999-12-31', + '9999-12-31', + '13:00:01', + '9999-01-10 00:00:00 +01:00', + '*', + 'abc', + - 2147483648, + 'abc', + '*', + 'abc', + 'abc', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + - 32768, + NULL, + 'abc', + 0, + 0, + NULL, + NULL, + '9990000.3647', + '-214748.3648' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 3, + 0, + NULL, + NULL, + '1999-01-08', + NULL, + '9999-12-31T13:00:04Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04Z', + NULL, + NULL, + N'Миші йдуть на південь, не питай чому;', + 2147483647, + 'Some test text 123$%^&*()_', + N'ї', + N'Миші йдуть на південь, не питай чому;', + N'Миші йдуть на південь, не питай чому;', + NULL, + NULL, + NULL, + NULL, + '', + NULL, + 32767, + NULL, + N'Миші йдуть на південь, не питай чому;', + 255, + 1, + NULL, + NULL, + NULL, + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 4, + NULL, + NULL, + NULL, + NULL, + NULL, + '9999-12-31T13:00:04.123Z', + NULL, + '13:00:04.123456Z', + NULL, + NULL, + N'櫻花分店', + NULL, + '', + NULL, + N'櫻花分店', + N'櫻花分店', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + N'櫻花分店', + NULL, + 'true', + NULL, + NULL, + NULL, + NULL + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 5, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + '', + NULL, + NULL, + NULL, + '', + '', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + '', + NULL, + 'false', + NULL, + NULL, + NULL, + NULL + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 6, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 7, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + N'\xF0\x9F\x9A\x80', + NULL, + NULL, + NULL, + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + N'\xF0\x9F\x9A\x80', + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + ); diff --git a/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full_without_nulls.sql b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full_without_nulls.sql new file mode 100644 index 000000000000..2b6483f9e569 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/integration_tests/seed/full_without_nulls.sql @@ -0,0 +1,380 @@ +CREATE + DATABASE MSSQL_FULL_NN; + +USE MSSQL_FULL_NN; + +CREATE + TABLE + dbo.TEST_DATASET( + id INTEGER PRIMARY KEY, + test_column_1 BIGINT, + test_column_10 FLOAT, + test_column_11 REAL, + test_column_12 DATE, + test_column_13 smalldatetime, + test_column_14 datetime, + test_column_15 datetime2, + test_column_16 TIME, + test_column_17 datetimeoffset, + test_column_18 CHAR, + test_column_19 VARCHAR(MAX) COLLATE Latin1_General_100_CI_AI_SC_UTF8, + test_column_2 INT, + test_column_20 text, + test_column_21 nchar, + test_column_22 nvarchar(MAX), + test_column_23 ntext, + test_column_24 BINARY, + test_column_25 VARBINARY(3), + test_column_26 geometry, + test_column_27 uniqueidentifier, + test_column_28 xml, + test_column_29 geography, + test_column_3 SMALLINT, + test_column_30 hierarchyid, + test_column_31 sql_variant, + test_column_4 tinyint, + test_column_5 bit, + test_column_6 DECIMAL( + 5, + 2 + ), + test_column_7 NUMERIC, + test_column_8 money, + test_column_9 smallmoney + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 1, + - 9223372036854775808, + '123', + '123', + '0001-01-01', + '1900-01-01', + '1753-01-01', + '0001-01-01', + '13:00:01', + '0001-01-10 00:00:00 +01:00', + 'a', + 'a', + - 2147483648, + 'a', + 'a', + 'a', + 'a', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '1', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + - 32768, + '/1/1/', + 'a', + 0, + 0, + 999.33, + '99999', + '9990000.3647', + '-214748.3648' + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 2, + 9223372036854775807, + '1234567890.1234567', + '1234567890.1234567', + '9999-12-31', + '2079-06-06', + '9999-12-31', + '9999-12-31', + '13:00:04Z', + '9999-01-10 00:00:00 +01:00', + '*', + 'abc', + 2147483647, + 'abc', + '*', + 'abc', + 'abc', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + 'abc', + 255, + 1, + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 3, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + N'Миші йдуть на південь, не питай чому;', + 2147483647, + 'Some test text 123$%^&*()_', + N'ї', + N'Миші йдуть на південь, не питай чому;', + N'Миші йдуть на південь, не питай чому;', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + N'Миші йдуть на південь, не питай чому;', + 255, + 'true', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 4, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + N'櫻花分店', + 2147483647, + '', + N'ї', + N'櫻花分店', + N'櫻花分店', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + N'櫻花分店', + 255, + 'false', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 5, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + '', + 2147483647, + '', + N'ї', + '', + '', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + '', + 255, + 'false', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 6, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + N'\xF0\x9F\x9A\x80', + 2147483647, + '', + N'ї', + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + N'\xF0\x9F\x9A\x80', + 255, + 'false', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); + +INSERT + INTO + dbo.TEST_DATASET + VALUES( + 7, + 0, + '1234567890.1234567', + '1234567890.1234567', + '1999-01-08', + '2079-06-06', + '9999-12-31T13:00:04.123Z', + '9999-12-31T13:00:04.123456Z', + '13:00:04.123456Z', + '9999-01-10 00:00:00 +01:00', + '*', + N'\xF0\x9F\x9A\x80', + 2147483647, + '', + N'ї', + N'\xF0\x9F\x9A\x80', + N'\xF0\x9F\x9A\x80', + CAST( + 'A' AS BINARY(1) + ), + CAST( + 'ABC' AS VARBINARY + ), + geometry::STGeomFromText( + 'LINESTRING (100 100, 20 180, 180 180)', + 0 + ), + '375CFC44-CAE3-4E43-8083-821D2DF0E626', + '', + geography::STGeomFromText( + 'LINESTRING(-122.360 47.656, -122.343 47.656 )', + 4326 + ), + 32767, + '/1/1/', + N'\xF0\x9F\x9A\x80', + 255, + 'false', + 999.33, + '99999', + '9990000.3647', + 214748.3647 + ); diff --git a/airbyte-integrations/connectors/source-mssql-v1/metadata.yaml b/airbyte-integrations/connectors/source-mssql-v1/metadata.yaml new file mode 100644 index 000000000000..62f33acd02e8 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-v1/metadata.yaml @@ -0,0 +1,66 @@ +data: + ab_internal: + ql: 200 + sl: 100 + allowedHosts: + hosts: + - ${host} + - ${tunnel_method.tunnel_host} + connectorSubtype: database + connectorType: source + definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 + dockerImageTag: 4.1.16 + dockerRepository: airbyte/source-mssql + documentationUrl: https://docs.airbyte.com/integrations/sources/mssql + githubIssueLabel: source-mssql + icon: mssql.svg + license: ELv2 + maxSecondsBetweenMessages: 7200 + name: Microsoft SQL Server (MSSQL) + registryOverrides: + cloud: + enabled: true + oss: + enabled: true + releaseStage: generally_available + supportLevel: certified + tags: + - language:java + releases: + breakingChanges: + 4.0.0: + message: "We have overhauled our MSSQL source connector and it is now supported by the Airbyte team! To benefit from new features, including terabyte-sized table support, reliability improvements, expanded datetime data types, and various bug fixes, please opt in to the 4.0.0 version." + upgradeDeadline: "2024-04-07" + 3.0.0: + message: "Remapped columns of types: date, datetime, datetime2, datetimeoffset, smalldatetime, and time from `String` to their appropriate Airbyte types. Customers whose streams have columns with the affected data types must take action with their connections." + upgradeDeadline: "2023-12-07" + 2.0.0: + message: "Add default cursor for cdc" + upgradeDeadline: "2023-08-23" + connectorTestSuitesOptions: + - suite: unitTests + - suite: integrationTests + testSecrets: + - name: SECRET_SOURCE-MSSQL__CREDS + fileName: config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store + - name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS + fileName: performance-config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store + - suite: acceptanceTests + testSecrets: + - name: SECRET_SOURCE-MSSQL__CREDS + fileName: config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store + - name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS + fileName: performance-config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MsSqlSpecConstants.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcConnectorMetadataInjector.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcSavedInfoFetcher.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcTargetPosition.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlDebeziumConverter.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlQueryUtils.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlCdcStateConstants.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cdc/MssqlDebeziumStateUtil.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/cursor_based/MssqlCursorBasedStateManager.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/CdcMetadataInjector.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadGlobalStateManager.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadHandler.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadRecordIterator.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStateManager.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialLoadStreamStateManager.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java b/airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java rename to airbyte-integrations/connectors/source-mssql-v1/src/main/java/io/airbyte/integrations/source/mssql/initialsync/MssqlInitialReadUtil.java diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt b/airbyte-integrations/connectors/source-mssql-v1/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt rename to airbyte-integrations/connectors/source-mssql-v1/src/main/kotlin/io/airbyte/integrations/source/mssql/MSSqlSourceExceptionHandler.kt diff --git a/airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json b/airbyte-integrations/connectors/source-mssql-v1/src/main/resources/spec.json similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json rename to airbyte-integrations/connectors/source-mssql-v1/src/main/resources/spec.json diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/CloudDeploymentSslEnabledMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceOperationsTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SshKeyMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SshPasswordMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/resources/dummy_config.json b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/resources/dummy_config.json similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/resources/dummy_config.json rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/resources/dummy_config.json diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/resources/expected_spec.json b/airbyte-integrations/connectors/source-mssql-v1/src/test-integration/resources/expected_spec.json similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-integration/resources/expected_spec.json rename to airbyte-integrations/connectors/source-mssql-v1/src/test-integration/resources/expected_spec.json diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test-performance/java/io/airbyte/integrations/source/mssql/MssqlSourcePerformanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/sql/create_mssql_benchmarks.sql b/airbyte-integrations/connectors/source-mssql-v1/src/test-performance/sql/create_mssql_benchmarks.sql similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test-performance/sql/create_mssql_benchmarks.sql rename to airbyte-integrations/connectors/source-mssql-v1/src/test-performance/sql/create_mssql_benchmarks.sql diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSslSourceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CdcStateCompressionTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/CloudDeploymentMssqlTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlAgentStateTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlCdcHelperTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlDataSourceFactoryTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlDebeziumStateUtilTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlInitialLoadHandlerTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlSslSourceTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java b/airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java rename to airbyte-integrations/connectors/source-mssql-v1/src/test/java/io/airbyte/integrations/source/mssql/MssqlStressTest.java diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java b/airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java rename to airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java b/airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java rename to airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java b/airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java similarity index 100% rename from airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java rename to airbyte-integrations/connectors/source-mssql-v1/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSqlTestDatabaseWithBackgroundThreads.java diff --git a/airbyte-integrations/connectors/source-mssql/build.gradle b/airbyte-integrations/connectors/source-mssql/build.gradle index 14b581fdd24d..1c1e57ce977a 100644 --- a/airbyte-integrations/connectors/source-mssql/build.gradle +++ b/airbyte-integrations/connectors/source-mssql/build.gradle @@ -1,26 +1,15 @@ plugins { - id 'airbyte-java-connector' + id 'airbyte-bulk-connector' } -airbyteJavaConnector { - cdkVersionRequired = '0.45.1' - features = ['db-sources'] - useLocalCdk = false -} - -java { - // TODO: rewrite code to avoid javac warnings in the first place - compileJava { - options.compilerArgs += "-Xlint:-try,-rawtypes" - } - compileTestFixturesJava { - options.compilerArgs += "-Xlint:-this-escape" - } +application { + mainClass = 'io.airbyte.integrations.source.mssql.MsSqlServerSource' } -application { - mainClass = 'io.airbyte.integrations.source.mssql.MssqlSource' - applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +airbyteBulkConnector { + core = 'extract' + toolkits = ['extract-jdbc', 'extract-cdc'] + cdk = 'local' } dependencies { @@ -28,12 +17,15 @@ dependencies { implementation 'io.debezium:debezium-embedded:2.7.1.Final' implementation 'io.debezium:debezium-connector-sqlserver:2.6.2.Final' implementation 'org.codehaus.plexus:plexus-utils:3.4.2' + api 'org.apache.commons:commons-lang3:3.17.0' + implementation 'org.apache.commons:commons-lang3:3.17.0' testFixturesImplementation 'org.testcontainers:mssqlserver:1.19.0' testImplementation 'org.awaitility:awaitility:4.2.0' testImplementation 'org.hamcrest:hamcrest-all:1.3' testImplementation 'org.testcontainers:mssqlserver:1.19.0' + testImplementation("io.mockk:mockk:1.12.0") } compileKotlin { diff --git a/airbyte-integrations/connectors/source-mssql/metadata.yaml b/airbyte-integrations/connectors/source-mssql/metadata.yaml index 62f33acd02e8..c827d349b987 100644 --- a/airbyte-integrations/connectors/source-mssql/metadata.yaml +++ b/airbyte-integrations/connectors/source-mssql/metadata.yaml @@ -39,18 +39,6 @@ data: upgradeDeadline: "2023-08-23" connectorTestSuitesOptions: - suite: unitTests - - suite: integrationTests - testSecrets: - - name: SECRET_SOURCE-MSSQL__CREDS - fileName: config.json - secretStore: - type: GSM - alias: airbyte-connector-testing-secret-store - - name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS - fileName: performance-config.json - secretStore: - type: GSM - alias: airbyte-connector-testing-secret-store - suite: acceptanceTests testSecrets: - name: SECRET_SOURCE-MSSQL__CREDS diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCdcInitialSnapshotStateValue.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCdcInitialSnapshotStateValue.kt new file mode 100644 index 000000000000..7cc5722a0fb7 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCdcInitialSnapshotStateValue.kt @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.util.Jsons + +data class MsSqlServerCdcInitialSnapshotStateValue( + @JsonProperty("pk_val") val pkVal: String? = null, + @JsonProperty("pk_name") val pkName: String? = null, + @JsonProperty("version") val version: Int? = null, + @JsonProperty("state_type") val stateType: String? = null, + @JsonProperty("incremental_state") val incrementalState: JsonNode? = null, + @JsonProperty("stream_name") val streamName: String? = null, + @JsonProperty("cursor_field") val cursorField: List? = null, + @JsonProperty("stream_namespace") val streamNamespace: String? = null, +) { + companion object { + /** Value representing the completion of a FULL_REFRESH snapshot. */ + fun getSnapshotCompletedState(stream: Stream): OpaqueStateValue = + Jsons.valueToTree( + MsSqlServerCdcInitialSnapshotStateValue( + streamName = stream.name, + cursorField = listOf(), + streamNamespace = stream.namespace + ) + ) + + /** Value representing the progress of an ongoing snapshot. */ + fun snapshotCheckpoint( + primaryKey: List, + primaryKeyCheckpoint: List, + ): OpaqueStateValue { + val primaryKeyField = primaryKey.first() + return Jsons.valueToTree( + MsSqlServerCdcInitialSnapshotStateValue( + pkName = primaryKeyField.id, + pkVal = primaryKeyCheckpoint.first().asText(), + stateType = "primary_key", + ) + ) + } + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerDebeziumOperations.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerDebeziumOperations.kt new file mode 100644 index 000000000000..6f807ac3fd06 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerDebeziumOperations.kt @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.read.cdc.* +import io.airbyte.cdk.util.Jsons +import jakarta.inject.Singleton +import org.apache.kafka.connect.source.SourceRecord + +@Singleton +class MsSqlServerDebeziumOperations : DebeziumOperations { + override fun position(offset: DebeziumOffset): MsSqlServerDebeziumPosition { + return MsSqlServerDebeziumPosition() + } + + override fun position(recordValue: DebeziumRecordValue): MsSqlServerDebeziumPosition? { + return MsSqlServerDebeziumPosition() + } + + override fun position(sourceRecord: SourceRecord): MsSqlServerDebeziumPosition? { + return MsSqlServerDebeziumPosition() + } + + override fun synthesize(): DebeziumInput { + return DebeziumInput( + isSynthetic = true, + state = DebeziumState(DebeziumOffset(emptyMap()), null), + properties = emptyMap() + ) + } + + override fun deserialize( + opaqueStateValue: OpaqueStateValue, + streams: List + ): DebeziumInput { + return DebeziumInput( + isSynthetic = true, + state = DebeziumState(DebeziumOffset(emptyMap()), null), + properties = emptyMap() + ) + } + + override fun deserialize( + key: DebeziumRecordKey, + value: DebeziumRecordValue + ): DeserializedRecord? { + return null + } + + override fun serialize(debeziumState: DebeziumState): OpaqueStateValue { + return Jsons.objectNode() + } +} + +class MsSqlServerDebeziumPosition : Comparable { + override fun compareTo(other: MsSqlServerDebeziumPosition): Int { + return 0 + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerFieldTypeMapper.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerFieldTypeMapper.kt new file mode 100644 index 000000000000..57788cce7e54 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerFieldTypeMapper.kt @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.discover.FieldType +import io.airbyte.cdk.discover.JdbcMetadataQuerier +import io.airbyte.cdk.discover.SystemType +import io.airbyte.cdk.jdbc.* +import io.github.oshai.kotlinlogging.KotlinLogging +import io.micronaut.context.annotation.Primary +import jakarta.inject.Singleton +import java.sql.JDBCType + +private val log = KotlinLogging.logger {} + +@Singleton +@Primary +class MsSqlServerFieldTypeMapper : JdbcMetadataQuerier.FieldTypeMapper { + override fun toFieldType(c: JdbcMetadataQuerier.ColumnMetadata): FieldType { + when (val type = c.type) { + is SystemType -> { + val retVal = leafType(type) + return retVal + } + else -> { + return PokemonFieldType + } + } + } + + private fun leafType(type: SystemType): JdbcFieldType<*> { + return MsSqlServerSqlType.fromName(type.typeName)?.jdbcType + ?: when (type.jdbcType) { + JDBCType.BIT -> BooleanFieldType + JDBCType.TINYINT -> ShortFieldType + JDBCType.SMALLINT -> ShortFieldType + JDBCType.INTEGER -> IntFieldType + JDBCType.BIGINT -> BigIntegerFieldType + JDBCType.FLOAT -> FloatFieldType + JDBCType.REAL -> DoubleFieldType + JDBCType.DOUBLE -> DoubleFieldType + JDBCType.NUMERIC -> DoubleFieldType + JDBCType.DECIMAL -> BigIntegerFieldType + JDBCType.CHAR -> StringFieldType + JDBCType.VARCHAR -> StringFieldType + JDBCType.LONGVARCHAR -> StringFieldType + JDBCType.DATE -> LocalDateFieldType + JDBCType.TIME -> LocalTimeFieldType + JDBCType.TIMESTAMP -> LocalDateTimeFieldType + JDBCType.BINARY -> BytesFieldType + JDBCType.VARBINARY -> BytesFieldType + JDBCType.LONGVARBINARY -> BytesFieldType + JDBCType.NULL -> NullFieldType + JDBCType.OTHER -> PokemonFieldType + JDBCType.JAVA_OBJECT -> PokemonFieldType + JDBCType.DISTINCT -> PokemonFieldType + JDBCType.STRUCT -> PokemonFieldType + JDBCType.ARRAY -> PokemonFieldType + JDBCType.BLOB -> BinaryStreamFieldType + JDBCType.CLOB -> CharacterStreamFieldType + JDBCType.REF -> PokemonFieldType + JDBCType.DATALINK -> PokemonFieldType + JDBCType.BOOLEAN -> BooleanFieldType + JDBCType.ROWID -> PokemonFieldType + JDBCType.NCHAR -> StringFieldType + JDBCType.NVARCHAR -> StringFieldType + JDBCType.LONGNVARCHAR -> StringFieldType + JDBCType.NCLOB -> CharacterStreamFieldType + JDBCType.SQLXML -> PokemonFieldType + JDBCType.REF_CURSOR -> PokemonFieldType + JDBCType.TIME_WITH_TIMEZONE -> OffsetTimeFieldType + JDBCType.TIMESTAMP_WITH_TIMEZONE -> OffsetDateTimeFieldType + null -> PokemonFieldType + } + } + + enum class MsSqlServerSqlType(val names: List, val jdbcType: JdbcFieldType<*>) { + BINARY(BinaryStreamFieldType, "VARBINARY", "BINARY"), + DATETIME_TYPES(LocalDateTimeFieldType, "DATETIME", "DATETIME2", "SMALLDATETIME"), + DATE(LocalDateFieldType, "DATE"), + DATETIMEOFFSET(OffsetDateTimeFieldType, "DATETIMEOFFSET"), + TIME_TYPE(LocalTimeFieldType, "TIME"), + SMALLMONEY_TYPE(PokemonFieldType, "SMALLMONEY"), + GEOMETRY(PokemonFieldType, "GEOMETRY"), + GEOGRAPHY(PokemonFieldType, "GEOGRAPHY"); + + constructor( + jdbcType: JdbcFieldType<*>, + vararg names: String + ) : this(names.toList(), jdbcType) {} + + companion object { + private val nameToValue = + MsSqlServerSqlType.entries + .flatMap { msSqlServerSqlType -> + msSqlServerSqlType.names.map { name -> name to msSqlServerSqlType } + } + .toMap() + + fun fromName(name: String?): MsSqlServerSqlType? { + val retVal = nameToValue[name] + return retVal + } + } + } + + companion object { + val DATETIME_FORMAT_MICROSECONDS = "yyyy-MM-dd'T'HH:mm:ss[.][SSSSSS]" + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartition.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartition.kt new file mode 100644 index 000000000000..1b3d9562cd34 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartition.kt @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.read.And +import io.airbyte.cdk.read.DefaultJdbcStreamState +import io.airbyte.cdk.read.Equal +import io.airbyte.cdk.read.From +import io.airbyte.cdk.read.FromSample +import io.airbyte.cdk.read.Greater +import io.airbyte.cdk.read.GreaterOrEqual +import io.airbyte.cdk.read.JdbcCursorPartition +import io.airbyte.cdk.read.JdbcPartition +import io.airbyte.cdk.read.JdbcSplittablePartition +import io.airbyte.cdk.read.Lesser +import io.airbyte.cdk.read.LesserOrEqual +import io.airbyte.cdk.read.Limit +import io.airbyte.cdk.read.NoWhere +import io.airbyte.cdk.read.Or +import io.airbyte.cdk.read.OrderBy +import io.airbyte.cdk.read.SelectColumnMaxValue +import io.airbyte.cdk.read.SelectColumns +import io.airbyte.cdk.read.SelectQuery +import io.airbyte.cdk.read.SelectQueryGenerator +import io.airbyte.cdk.read.SelectQuerySpec +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.read.Where +import io.airbyte.cdk.read.WhereClauseLeafNode +import io.airbyte.cdk.read.WhereClauseNode +import io.airbyte.cdk.read.optimize +import io.airbyte.cdk.util.Jsons + +/** Base class for default implementations of [JdbcPartition] for non resumable partitions. */ +sealed class MsSqlServerJdbcPartition( + val selectQueryGenerator: SelectQueryGenerator, + streamState: DefaultJdbcStreamState, +) : JdbcPartition { + val stream: Stream = streamState.stream + val from = From(stream.name, stream.namespace) + + override val nonResumableQuery: SelectQuery + get() = selectQueryGenerator.generate(nonResumableQuerySpec.optimize()) + + open val nonResumableQuerySpec = SelectQuerySpec(SelectColumns(stream.fields), from) + + override fun samplingQuery(sampleRateInvPow2: Int): SelectQuery { + val sampleSize: Int = streamState.sharedState.maxSampleSize + val querySpec = + SelectQuerySpec( + SelectColumns(stream.fields), + From(stream.name, stream.namespace), + limit = Limit(sampleSize.toLong()), + ) + return selectQueryGenerator.generate(querySpec.optimize()) + } +} + +/** Default implementation of a [JdbcPartition] for an unsplittable snapshot partition. */ +class MsSqlServerJdbcNonResumableSnapshotPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, +) : MsSqlServerJdbcPartition(selectQueryGenerator, streamState) { + + override val completeState: OpaqueStateValue = MsSqlServerJdbcStreamStateValue.snapshotCompleted +} + +/** + * Default implementation of a [JdbcPartition] for an non resumable snapshot partition preceding a + * cursor-based incremental sync. + */ +class MsSqlServerJdbcNonResumableSnapshotWithCursorPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + val cursor: Field, +) : + MsSqlServerJdbcPartition(selectQueryGenerator, streamState), + JdbcCursorPartition { + + override val completeState: OpaqueStateValue + get() = + MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint( + cursor, + cursorCheckpoint = streamState.cursorUpperBound!!, + streamState.stream, + ) + + override val cursorUpperBoundQuery: SelectQuery + get() = selectQueryGenerator.generate(cursorUpperBoundQuerySpec.optimize()) + + val cursorUpperBoundQuerySpec = SelectQuerySpec(SelectColumnMaxValue(cursor), from) +} + +/** Base class for default implementations of [JdbcPartition] for partitions. */ +sealed class MsSqlServerJdbcResumablePartition( + selectQueryGenerator: SelectQueryGenerator, + streamState: DefaultJdbcStreamState, + val checkpointColumns: List, +) : + MsSqlServerJdbcPartition(selectQueryGenerator, streamState), + JdbcSplittablePartition { + abstract val lowerBound: List? + abstract val upperBound: List? + + override val nonResumableQuery: SelectQuery + get() = selectQueryGenerator.generate(nonResumableQuerySpec.optimize()) + + override val nonResumableQuerySpec: SelectQuerySpec + get() = SelectQuerySpec(SelectColumns(stream.fields), from, where) + + override fun resumableQuery(limit: Long): SelectQuery { + val querySpec = + SelectQuerySpec( + SelectColumns((stream.fields + checkpointColumns).distinct()), + from, + where, + OrderBy(checkpointColumns), + Limit(limit), + ) + return selectQueryGenerator.generate(querySpec.optimize()) + } + + override fun samplingQuery(sampleRateInvPow2: Int): SelectQuery { + val sampleSize: Int = streamState.sharedState.maxSampleSize + val querySpec = + SelectQuerySpec( + SelectColumns(stream.fields + checkpointColumns), + FromSample(stream.name, stream.namespace, sampleRateInvPow2, sampleSize), + NoWhere, + OrderBy(checkpointColumns), + Limit(sampleSize.toLong()) + ) + return selectQueryGenerator.generate(querySpec.optimize()) + } + + val where: Where + get() { + val zippedLowerBound: List> = + lowerBound?.let { checkpointColumns.zip(it) } ?: listOf() + val lowerBoundDisj: List = + zippedLowerBound.mapIndexed { idx: Int, (gtCol: Field, gtValue: JsonNode) -> + val lastLeaf: WhereClauseLeafNode = + if (isLowerBoundIncluded && idx == checkpointColumns.size - 1) { + GreaterOrEqual(gtCol, gtValue) + } else { + Greater(gtCol, gtValue) + } + And( + zippedLowerBound.take(idx).map { (eqCol: Field, eqValue: JsonNode) -> + Equal(eqCol, eqValue) + } + listOf(lastLeaf), + ) + } + val zippedUpperBound: List> = + upperBound?.let { checkpointColumns.zip(it) } ?: listOf() + val upperBoundDisj: List = + zippedUpperBound.mapIndexed { idx: Int, (leqCol: Field, leqValue: JsonNode) -> + val lastLeaf: WhereClauseLeafNode = + if (idx < zippedUpperBound.size - 1) { + Lesser(leqCol, leqValue) + } else { + LesserOrEqual(leqCol, leqValue) + } + And( + zippedUpperBound.take(idx).map { (eqCol: Field, eqValue: JsonNode) -> + Equal(eqCol, eqValue) + } + listOf(lastLeaf), + ) + } + return Where(And(Or(lowerBoundDisj), Or(upperBoundDisj))) + } + + open val isLowerBoundIncluded: Boolean = false +} + +/** RFR for cursor based read. */ +class MsSqlServerJdbcRfrSnapshotPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + primaryKey: List, + override val lowerBound: List?, + override val upperBound: List?, +) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) { + + // TODO: this needs to reflect lastRecord. Complete state needs to have last primary key value + // in RFR case. + override val completeState: OpaqueStateValue + get() = + MsSqlServerJdbcStreamStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = + checkpointColumns.map { upperBound?.get(0) ?: Jsons.nullNode() }, + ) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerJdbcStreamStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() }, + ) +} + +/** RFR for CDC. */ +class MsSqlServerJdbcCdcRfrSnapshotPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + primaryKey: List, + override val lowerBound: List?, + override val upperBound: List?, +) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) { + + override val completeState: OpaqueStateValue + get() = + MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = + checkpointColumns.map { upperBound?.get(0) ?: Jsons.nullNode() }, + ) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() }, + ) +} + +/** + * Implementation of a [JdbcPartition] for a CDC snapshot partition. Used for incremental CDC + * initial sync. + */ +class MsSqlServerJdbcCdcSnapshotPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + primaryKey: List, + override val lowerBound: List? +) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) { + override val upperBound: List? = null + override val completeState: OpaqueStateValue + get() = MsSqlServerCdcInitialSnapshotStateValue.getSnapshotCompletedState(stream) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() }, + ) +} + +/** + * Default implementation of a [JdbcPartition] for a splittable partition involving cursor columns. + */ +sealed class MsSqlServerJdbcCursorPartition( + selectQueryGenerator: SelectQueryGenerator, + streamState: DefaultJdbcStreamState, + checkpointColumns: List, + val cursor: Field, + private val explicitCursorUpperBound: JsonNode?, +) : + MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, checkpointColumns), + JdbcCursorPartition { + + val cursorUpperBound: JsonNode + get() = explicitCursorUpperBound ?: streamState.cursorUpperBound!! + + override val cursorUpperBoundQuery: SelectQuery + get() = selectQueryGenerator.generate(cursorUpperBoundQuerySpec.optimize()) + + val cursorUpperBoundQuerySpec = SelectQuerySpec(SelectColumnMaxValue(cursor), from) +} + +/** + * Default implementation of a [JdbcPartition] for a splittable snapshot partition preceding a + * cursor-based incremental sync. + */ +class MsSqlServerJdbcSnapshotWithCursorPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + primaryKey: List, + override val lowerBound: List?, + cursor: Field, + cursorUpperBound: JsonNode?, +) : + MsSqlServerJdbcCursorPartition( + selectQueryGenerator, + streamState, + primaryKey, + cursor, + cursorUpperBound + ) { + // UpperBound is not used because the partition is not splittable. + override val upperBound: List? = null + + override val completeState: OpaqueStateValue + get() = + MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint( + cursor, + cursorUpperBound, + stream, + ) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerJdbcStreamStateValue.snapshotWithCursorCheckpoint( + primaryKey = checkpointColumns, + primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() }, + cursor, + stream, + ) +} + +/** + * Default implementation of a [JdbcPartition] for a cursor incremental partition. These are always + * splittable. + */ +class MsSqlServerJdbcCursorIncrementalPartition( + selectQueryGenerator: SelectQueryGenerator, + override val streamState: DefaultJdbcStreamState, + cursor: Field, + val cursorLowerBound: JsonNode, + override val isLowerBoundIncluded: Boolean, + cursorUpperBound: JsonNode?, +) : + MsSqlServerJdbcCursorPartition( + selectQueryGenerator, + streamState, + listOf(cursor), + cursor, + cursorUpperBound + ) { + + override val lowerBound: List = listOf(cursorLowerBound) + override val upperBound: List + get() = listOf(cursorUpperBound) + + override val completeState: OpaqueStateValue + get() = + MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint( + cursor, + cursorCheckpoint = cursorUpperBound, + stream, + ) + + override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue = + MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint( + cursor, + cursorCheckpoint = lastRecord[cursor.id] ?: Jsons.nullNode(), + stream, + ) +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartitionFactory.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartitionFactory.kt new file mode 100644 index 000000000000..7bd9d766c808 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcPartitionFactory.kt @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mysql + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.BinaryNode +import io.airbyte.cdk.ConfigErrorException +import io.airbyte.cdk.StreamIdentifier +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.data.LeafAirbyteSchemaType +import io.airbyte.cdk.data.LocalDateTimeCodec +import io.airbyte.cdk.data.OffsetDateTimeCodec +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.jdbc.JdbcFieldType +import io.airbyte.cdk.read.ConfiguredSyncMode +import io.airbyte.cdk.read.DefaultJdbcSharedState +import io.airbyte.cdk.read.DefaultJdbcStreamState +import io.airbyte.cdk.read.From +import io.airbyte.cdk.read.JdbcPartitionFactory +import io.airbyte.cdk.read.SelectColumnMaxValue +import io.airbyte.cdk.read.SelectQuerySpec +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.read.StreamFeedBootstrap +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.* +import io.micronaut.context.annotation.Primary +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter +import java.time.format.DateTimeParseException +import java.util.Base64 +import java.util.concurrent.ConcurrentHashMap +import javax.inject.Singleton + +@Primary +@Singleton +class MsSqlServerJdbcPartitionFactory( + override val sharedState: DefaultJdbcSharedState, + val selectQueryGenerator: MsSqlServerSelectQueryGenerator, + val config: MsSqlServerSourceConfiguration, +) : + JdbcPartitionFactory< + DefaultJdbcSharedState, + DefaultJdbcStreamState, + MsSqlServerJdbcPartition, + > { + + private val streamStates = ConcurrentHashMap() + + override fun streamState(streamFeedBootstrap: StreamFeedBootstrap): DefaultJdbcStreamState = + streamStates.getOrPut(streamFeedBootstrap.feed.id) { + DefaultJdbcStreamState(sharedState, streamFeedBootstrap) + } + + private fun findPkUpperBound(stream: Stream, pkChosenFromCatalog: List): JsonNode { + // find upper bound using maxPk query + val jdbcConnectionFactory = JdbcConnectionFactory(config) + val from = From(stream.name, stream.namespace) + val maxPkQuery = SelectQuerySpec(SelectColumnMaxValue(pkChosenFromCatalog[0]), from) + + jdbcConnectionFactory.get().use { connection -> + val stmt = connection.prepareStatement(selectQueryGenerator.generate(maxPkQuery).sql) + val rs = stmt.executeQuery() + + if (rs.next()) { + val jdbcFieldType = pkChosenFromCatalog[0].type as JdbcFieldType<*> + val pkUpperBound: JsonNode = jdbcFieldType.get(rs, 1) + return pkUpperBound + } else { + // Table might be empty thus there is no max PK value. + return Jsons.nullNode() + } + } + } + + private fun coldStart(streamState: DefaultJdbcStreamState): MsSqlServerJdbcPartition { + val stream: Stream = streamState.stream + val pkChosenFromCatalog: List = stream.configuredPrimaryKey ?: listOf() + + if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) { + if (pkChosenFromCatalog.isEmpty()) { + return MsSqlServerJdbcNonResumableSnapshotPartition( + selectQueryGenerator, + streamState, + ) + } + + val upperBound = findPkUpperBound(stream, pkChosenFromCatalog) + if (sharedState.configuration.global) { + return MsSqlServerJdbcCdcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = null, + upperBound = listOf(upperBound) + ) + } else { + MsSqlServerJdbcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = null, + upperBound = listOf(upperBound) + ) + } + } + + if (sharedState.configuration.global) { + return MsSqlServerJdbcCdcSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = null, + ) + } + + val cursorChosenFromCatalog: Field = + stream.configuredCursor as? Field ?: throw ConfigErrorException("no cursor") + + if (pkChosenFromCatalog.isEmpty()) { + return MsSqlServerJdbcNonResumableSnapshotWithCursorPartition( + selectQueryGenerator, + streamState, + cursorChosenFromCatalog + ) + } + return MsSqlServerJdbcSnapshotWithCursorPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = null, + cursorChosenFromCatalog, + cursorUpperBound = null, + ) + } + + /** + * Flowchart: + * 1. If the input state is null - using coldstart. + * ``` + * a. If it's global but without PK, use non-resumable snapshot. + * b. If it's global with PK, use snapshot. + * c. If it's not global, use snapshot with cursor. + * ``` + * 2. If the input state is not null - + * ``` + * a. If it's in global mode, JdbcPartitionFactory will not handle this. (TODO) + * b. If it's cursor based, it could be either in PK read phase (initial read) or + * cursor read phase (incremental read). This is differentiated by the stateType. + * i. In PK read phase, use snapshot with cursor. If no PKs were found, + * use non-resumable snapshot with cursor. + * ii. In cursor read phase, use cursor incremental. + * ``` + */ + override fun create(streamFeedBootstrap: StreamFeedBootstrap): MsSqlServerJdbcPartition? { + val stream: Stream = streamFeedBootstrap.feed + val streamState: DefaultJdbcStreamState = streamState(streamFeedBootstrap) + val opaqueStateValue: OpaqueStateValue = + streamFeedBootstrap.currentState ?: return coldStart(streamState) + + val isCursorBased: Boolean = !sharedState.configuration.global + + val pkChosenFromCatalog: List = stream.configuredPrimaryKey ?: listOf() + + if ( + pkChosenFromCatalog.isEmpty() && + stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH + ) { + if ( + streamState.streamFeedBootstrap.currentState == + MsSqlServerJdbcStreamStateValue.snapshotCompleted + ) { + return null + } + return MsSqlServerJdbcNonResumableSnapshotPartition( + selectQueryGenerator, + streamState, + ) + } + + if (!isCursorBased) { + val sv: MsSqlServerCdcInitialSnapshotStateValue = + Jsons.treeToValue( + opaqueStateValue, + MsSqlServerCdcInitialSnapshotStateValue::class.java + ) + + if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) { + val upperBound = findPkUpperBound(stream, pkChosenFromCatalog) + if (sv.pkVal == upperBound.asText()) { + return null + } + val pkLowerBound: JsonNode = stateValueToJsonNode(pkChosenFromCatalog[0], sv.pkVal) + + return MsSqlServerJdbcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound), + upperBound = listOf(upperBound) + ) + } + + if (sv.pkName == null) { + // This indicates initial snapshot has been completed. CDC snapshot will be handled + // by CDCPartitionFactory. + // Nothing to do here. + return null + } else { + // This branch indicates snapshot is incomplete. We need to resume based on previous + // snapshot state. + val pkField = pkChosenFromCatalog.first() + val pkLowerBound: JsonNode = stateValueToJsonNode(pkField, sv.pkVal) + + if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) { + val upperBound = findPkUpperBound(stream, pkChosenFromCatalog) + if (sv.pkVal == upperBound.asText()) { + return null + } + return MsSqlServerJdbcCdcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound), + upperBound = listOf(upperBound) + ) + } + return MsSqlServerJdbcCdcSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = listOf(pkLowerBound), + ) + } + } else { + val sv: MsSqlServerJdbcStreamStateValue = + Jsons.treeToValue(opaqueStateValue, MsSqlServerJdbcStreamStateValue::class.java) + + if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) { + val upperBound = findPkUpperBound(stream, pkChosenFromCatalog) + if (sv.pkValue == upperBound.asText()) { + return null + } + val pkLowerBound: JsonNode = + stateValueToJsonNode(pkChosenFromCatalog[0], sv.pkValue) + + return MsSqlServerJdbcCdcRfrSnapshotPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound), + upperBound = listOf(upperBound) + ) + } + + if (sv.stateType != "cursor_based") { + // Loading value from catalog. Note there could be unexpected behaviors if user + // updates their schema but did not reset their state. + val pkField = pkChosenFromCatalog.first() + val pkLowerBound: JsonNode = stateValueToJsonNode(pkField, sv.pkValue) + + val cursorChosenFromCatalog: Field = + stream.configuredCursor as? Field ?: throw ConfigErrorException("no cursor") + + // in a state where it's still in primary_key read part. + return MsSqlServerJdbcSnapshotWithCursorPartition( + selectQueryGenerator, + streamState, + pkChosenFromCatalog, + lowerBound = listOf(pkLowerBound), + cursorChosenFromCatalog, + cursorUpperBound = null, + ) + } + // resume back to cursor based increment. + val cursor: Field = stream.fields.find { it.id == sv.cursorField.first() } as Field + val cursorCheckpoint: JsonNode = stateValueToJsonNode(cursor, sv.cursors) + + // Compose a jsonnode of cursor label to cursor value to fit in + // DefaultJdbcCursorIncrementalPartition + if (cursorCheckpoint.toString() == streamState.cursorUpperBound?.toString()) { + // Incremental complete. + return null + } + return MsSqlServerJdbcCursorIncrementalPartition( + selectQueryGenerator, + streamState, + cursor, + cursorLowerBound = cursorCheckpoint, + isLowerBoundIncluded = false, + cursorUpperBound = streamState.cursorUpperBound, + ) + } + } + + private fun stateValueToJsonNode(field: Field, stateValue: String?): JsonNode { + when (field.type.airbyteSchemaType) { + is LeafAirbyteSchemaType -> + return when (field.type.airbyteSchemaType as LeafAirbyteSchemaType) { + LeafAirbyteSchemaType.INTEGER -> { + Jsons.valueToTree(stateValue?.toBigInteger()) + } + LeafAirbyteSchemaType.NUMBER -> { + Jsons.valueToTree(stateValue?.toDouble()) + } + LeafAirbyteSchemaType.BINARY -> { + val ba = Base64.getDecoder().decode(stateValue!!) + Jsons.valueToTree(ba) + } + LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE -> { + val timestampInStatePattern = "yyyy-MM-dd'T'HH:mm:ss" + try { + val formatter: DateTimeFormatter = + DateTimeFormatter.ofPattern(timestampInStatePattern) + Jsons.textNode( + LocalDateTime.parse(stateValue, formatter) + .format(LocalDateTimeCodec.formatter) + ) + } catch (e: DateTimeParseException) { + // Resolve to use the new format. + Jsons.valueToTree(stateValue) + } + } + LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE -> { + val timestampInStatePattern = "yyyy-MM-dd'T'HH:mm:ss" + try { + val formatter: DateTimeFormatter = + DateTimeFormatter.ofPattern(timestampInStatePattern) + Jsons.valueToTree( + LocalDateTime.parse(stateValue, formatter) + .minusDays(1) + .atOffset(java.time.ZoneOffset.UTC) + .format(OffsetDateTimeCodec.formatter) + ) + } catch (e: DateTimeParseException) { + // Resolve to use the new format. + Jsons.valueToTree(stateValue) + } + } + else -> Jsons.valueToTree(stateValue) + } + else -> + throw IllegalStateException( + "PK field must be leaf type but is ${field.type.airbyteSchemaType}." + ) + } + } + + override fun split( + unsplitPartition: MsSqlServerJdbcPartition, + opaqueStateValues: List + ): List { + // At this moment we don't support split on within mysql stream in any mode. + return listOf(unsplitPartition) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcStreamStateValue.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcStreamStateValue.kt new file mode 100644 index 000000000000..48a4a9535d76 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerJdbcStreamStateValue.kt @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.read.Stream +import io.airbyte.cdk.util.Jsons + +data class MsSqlServerJdbcStreamStateValue( + @JsonProperty("cursor") val cursors: String = "", + @JsonProperty("version") val version: Int = 2, + @JsonProperty("state_type") val stateType: String = StateType.CURSOR_BASED.stateType, + @JsonProperty("stream_name") val streamName: String = "", + @JsonProperty("cursor_field") val cursorField: List = listOf(), + @JsonProperty("stream_namespace") val streamNamespace: String = "", + @JsonProperty("cursor_record_count") val cursorRecordCount: Int = 0, + @JsonProperty("pk_name") val pkName: String? = null, + @JsonProperty("pk_val") val pkValue: String? = null, + @JsonProperty("incremental_state") val incrementalState: JsonNode? = null, +) { + companion object { + /** Value representing the completion of a FULL_REFRESH snapshot. */ + val snapshotCompleted: OpaqueStateValue + get() = Jsons.valueToTree(MsSqlServerJdbcStreamStateValue(stateType = "primary_key")) + + /** Value representing the progress of an ongoing incremental cursor read. */ + fun cursorIncrementalCheckpoint( + cursor: Field, + cursorCheckpoint: JsonNode, + stream: Stream, + ): OpaqueStateValue { + return Jsons.valueToTree( + MsSqlServerJdbcStreamStateValue( + cursorField = listOf(cursor.id), + cursors = cursorCheckpoint.asText(), + streamName = stream.name, + streamNamespace = stream.namespace!! + ) + ) + } + + /** Value representing the progress of an ongoing snapshot not involving cursor columns. */ + fun snapshotCheckpoint( + primaryKey: List, + primaryKeyCheckpoint: List, + ): OpaqueStateValue { + val primaryKeyField = primaryKey.first() + return Jsons.valueToTree( + MsSqlServerJdbcStreamStateValue( + pkName = primaryKeyField.id, + pkValue = primaryKeyCheckpoint.first().asText(), + stateType = StateType.PRIMARY_KEY.stateType, + ) + ) + } + + /** Value representing the progress of an ongoing snapshot involving cursor columns. */ + fun snapshotWithCursorCheckpoint( + primaryKey: List, + primaryKeyCheckpoint: List, + cursor: Field, + stream: Stream + ): OpaqueStateValue { + val primaryKeyField = primaryKey.first() + return Jsons.valueToTree( + MsSqlServerJdbcStreamStateValue( + pkName = primaryKeyField.id, + pkValue = primaryKeyCheckpoint.first().asText(), + stateType = StateType.PRIMARY_KEY.stateType, + incrementalState = + Jsons.valueToTree( + MsSqlServerJdbcStreamStateValue( + cursorField = listOf(cursor.id), + streamName = stream.name, + streamNamespace = stream.namespace!! + ) + ), + ) + ) + } + } +} + +enum class StateType(val stateType: String) { + PRIMARY_KEY("primary_key"), + CURSOR_BASED("cursor_based"), +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSelectQueryGenerator.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSelectQueryGenerator.kt new file mode 100644 index 000000000000..87f141cdcde3 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSelectQueryGenerator.kt @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.LosslessJdbcFieldType +import io.airbyte.cdk.read.* +import io.micronaut.context.annotation.Primary +import jakarta.inject.Singleton + +@Singleton +@Primary +class MsSqlServerSelectQueryGenerator : SelectQueryGenerator { + override fun generate(ast: SelectQuerySpec): SelectQuery = + SelectQuery(ast.sql(), ast.select.columns, ast.bindings()) + + fun SelectQuerySpec.sql(): String { + val components: List = + listOf(sql(select, limit), from.sql(), where.sql(), orderBy.sql()) + val sql: String = components.filter { it.isNotBlank() }.joinToString(" ") + return sql + } + + fun sql(selectNode: SelectNode, limit: LimitNode): String { + val topClause: String = + when (limit) { + NoLimit -> "" + Limit(0) -> "TOP 0 " + is Limit -> "TOP ${limit.n} " + } + return "SELECT $topClause" + + when (selectNode) { + is SelectColumns -> selectNode.columns.joinToString(", ") { it.sql() } + is SelectColumnMaxValue -> "MAX(${selectNode.column.sql()})" + } + } + + fun Field.sql(): String = "$id" + + fun FromNode.sql(): String = + when (this) { + NoFrom -> "" + is From -> if (this.namespace == null) "FROM $name" else "FROM $namespace.$name" + is FromSample -> { + val from: String = From(name, namespace).sql() + // On a table that is very big we limit sampling to no less than 0.05% + // chance of a row getting picked. This comes at a price of bias to the beginning + // of table on very large tables ( > 100s million of rows) + val greatestRate: String = 0.00005.toString() + // Quick approximation to "select count(*) from table" which doesn't require + // full table scan. + val quickCount = + "SELECT table_rows FROM information_schema.tables WHERE table_schema = '$namespace' AND table_name = '$name'" + val greatest = "GREATEST($greatestRate, $sampleSize / ($quickCount))" + // Rand returns a value between 0 and 1 + val where = "WHERE RAND() < $greatest " + "$from $where" + } + } + + fun WhereNode.sql(): String = + when (this) { + NoWhere -> "" + is Where -> "WHERE ${clause.sql()}" + } + + fun WhereClauseNode.sql(): String = + when (this) { + is And -> conj.joinToString(") AND (", "(", ")") { it.sql() } + is Or -> disj.joinToString(") OR (", "(", ")") { it.sql() } + is Equal -> "${column.sql()} = ?" + is Greater -> "${column.sql()} > ?" + is GreaterOrEqual -> "${column.sql()} >= ?" + is LesserOrEqual -> "${column.sql()} <= ?" + is Lesser -> "${column.sql()} < ?" + } + + fun OrderByNode.sql(): String = + when (this) { + NoOrderBy -> "" + is OrderBy -> "ORDER BY " + columns.joinToString(", ") { it.sql() } + } + + fun SelectQuerySpec.bindings(): List = where.bindings() + limit.bindings() + + fun WhereNode.bindings(): List = + when (this) { + is NoWhere -> listOf() + is Where -> clause.bindings() + } + + fun WhereClauseNode.bindings(): List = + when (this) { + is And -> conj.flatMap { it.bindings() } + is Or -> disj.flatMap { it.bindings() } + is WhereClauseLeafNode -> { + val type = column.type as LosslessJdbcFieldType<*, *> + listOf(SelectQuery.Binding(bindingValue, type)) + } + } + + fun LimitNode.bindings(): List = + when (this) { + NoLimit, + Limit(0), + is Limit -> emptyList() + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSource.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSource.kt new file mode 100644 index 000000000000..fc7250cfc09d --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSource.kt @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.AirbyteSourceRunner +import io.github.oshai.kotlinlogging.KotlinLogging + +object MsSqlServerSource { + private val log = KotlinLogging.logger {} + + @JvmStatic + fun main(args: Array) { + log.info { "SGX parameters = ${args.toList()}" } + AirbyteSourceRunner.run(*args) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfiguration.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfiguration.kt new file mode 100644 index 000000000000..730646195447 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfiguration.kt @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.command.* +import io.airbyte.cdk.ssh.SshConnectionOptions +import io.airbyte.cdk.ssh.SshNoTunnelMethod +import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerCdcReplicationConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerCursorBasedReplicationConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.micronaut.context.annotation.Factory +import jakarta.inject.Inject +import jakarta.inject.Singleton +import java.time.Duration + +sealed interface MsSqlServerIncrementalReplicationConfiguration + +data object MsSqlServerCursorBasedIncrementalReplicationConfiguration : + MsSqlServerIncrementalReplicationConfiguration + +data class MsSqlServerCdcIncrementalReplicationConfiguration(var initialWaitingSeconds: Int) : + MsSqlServerIncrementalReplicationConfiguration + +class MsSqlServerSourceConfiguration( + override val realHost: String, + override val realPort: Int, + override val sshTunnel: SshTunnelMethodConfiguration?, + override val sshConnectionOptions: SshConnectionOptions, + override val global: Boolean, + override val maxSnapshotReadDuration: Duration?, + override val checkpointTargetInterval: Duration, + override val maxConcurrency: Int, + override val resourceAcquisitionHeartbeat: Duration, + override val debeziumHeartbeatInterval: Duration, + override val jdbcUrlFmt: String, + override val jdbcProperties: Map, + override val namespaces: Set, + val incrementalReplicationConfiguration: MsSqlServerIncrementalReplicationConfiguration, +) : JdbcSourceConfiguration, CdcSourceConfiguration {} + +@Singleton +class MsSqlServerSourceConfigurationFactory +@Inject +constructor(val featureFlags: Set) : + SourceConfigurationFactory< + MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration> { + + constructor() : this(emptySet()) + + override fun makeWithoutExceptionHandling( + pojo: MsSqlServerSourceConfigurationSpecification, + ): MsSqlServerSourceConfiguration { + val replicationMethodPojo = pojo.replicationMethodJson + val incrementalReplicationConfiguration = + when (replicationMethodPojo) { + is MsSqlServerCdcReplicationConfigurationSpecification -> + MsSqlServerCdcIncrementalReplicationConfiguration( + initialWaitingSeconds = replicationMethodPojo.initialWaitingSeconds + ?: MsSqlServerCdcReplicationConfigurationSpecification + .DEFAULT_INITIAL_WAITING_SECONDS + ) + is MsSqlServerCursorBasedReplicationConfigurationSpecification -> + MsSqlServerCursorBasedIncrementalReplicationConfiguration + null -> TODO() + } + return MsSqlServerSourceConfiguration( + realHost = pojo.host, + realPort = pojo.port, + sshTunnel = SshNoTunnelMethod, + sshConnectionOptions = SshConnectionOptions.fromAdditionalProperties(emptyMap()), + global = + incrementalReplicationConfiguration + is MsSqlServerCdcIncrementalReplicationConfiguration, + maxSnapshotReadDuration = null, + checkpointTargetInterval = Duration.ofHours(1), + jdbcUrlFmt = "jdbc:sqlserver://%s:%d;databaseName=${pojo.database}", + namespaces = pojo.schemas?.toSet() ?: setOf(), + jdbcProperties = + mapOf("encrypt" to "false", "user" to pojo.username, "password" to pojo.password), + maxConcurrency = 10, + debeziumHeartbeatInterval = Duration.ofSeconds(15), + resourceAcquisitionHeartbeat = Duration.ofSeconds(15), + incrementalReplicationConfiguration = incrementalReplicationConfiguration + ) + } + + /** Required to inject [MysqlSourceConfiguration] directly. */ + @Factory + private class MicronautFactory { + @Singleton + fun mysqlSourceConfig( + factory: + SourceConfigurationFactory< + MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration>, + supplier: + ConfigurationSpecificationSupplier, + ): MsSqlServerSourceConfiguration = factory.make(supplier.get()) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerStreamFactory.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerStreamFactory.kt new file mode 100644 index 000000000000..98a8abf8357c --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerStreamFactory.kt @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.cdk.command.OpaqueStateValue +import io.airbyte.cdk.discover.* +import io.airbyte.cdk.read.Stream +import io.micronaut.context.annotation.Primary +import jakarta.inject.Singleton +import java.time.OffsetDateTime + +@Singleton +@Primary +class MsSqlServerStreamFactory : JdbcAirbyteStreamFactory { + override val globalCursor: MetaField? = null + override val globalMetaFields: Set = + setOf( + CommonMetaField.CDC_UPDATED_AT, + CommonMetaField.CDC_DELETED_AT, + MsSqlServerCdcMetaFields.CDC_CURSOR, + MsSqlServerCdcMetaFields.CDC_EVENT_SERIAL_NO, + MsSqlServerCdcMetaFields.CDC_LSN, + ) + + override fun decorateRecordData( + timestamp: OffsetDateTime, + globalStateValue: OpaqueStateValue?, + stream: Stream, + recordData: ObjectNode + ) { + recordData.set( + CommonMetaField.CDC_UPDATED_AT.id, + CdcOffsetDateTimeMetaFieldType.jsonEncoder.encode(timestamp), + ) /* + recordData.set( + MsSqlServerCdcMetaFields.CDC_EVENT_SERIAL_NO.id, + CdcIntegerMetaFieldType.jsonEncoder.encode(1), + ) + recordData.set( + MysqlCdcMetaFields.CDC_LOG_POS.id, + CdcIntegerMetaFieldType.jsonEncoder.encode(0), + ) + if (globalStateValue == null) { + return + } + val debeziumState: DebeziumState = + MySqlDebeziumOperations.deserializeDebeziumState(globalStateValue) + val position: MySqlPosition = MySqlDebeziumOperations.position(debeziumState.offset) + recordData.set( + MysqlCdcMetaFields.CDC_LOG_FILE.id, + CdcStringMetaFieldType.jsonEncoder.encode(position.fileName), + ) + recordData.set( + MysqlCdcMetaFields.CDC_LOG_POS.id, + CdcIntegerMetaFieldType.jsonEncoder.encode(position.position), + )*/ + } + + enum class MsSqlServerCdcMetaFields( + override val type: FieldType, + ) : MetaField { + CDC_CURSOR(CdcIntegerMetaFieldType), + CDC_LSN(CdcIntegerMetaFieldType), + CDC_EVENT_SERIAL_NO(CdcStringMetaFieldType), + ; + + override val id: String + get() = MetaField.META_PREFIX + name.lowercase() + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerEncryptionConfigurationSpecification.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerEncryptionConfigurationSpecification.kt new file mode 100644 index 000000000000..6b9b4b4f3994 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerEncryptionConfigurationSpecification.kt @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.config_spec + +import com.fasterxml.jackson.annotation.* +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDescription +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import io.airbyte.cdk.ConfigErrorException + +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "ssl_method") +@JsonSubTypes( + JsonSubTypes.Type( + value = MsSqlServerEncryptionDisabledConfigurationSpecification::class, + name = "unencrypted" + ), + JsonSubTypes.Type( + value = + MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification::class, + name = "encrypted_trust_server_certificate" + ), + JsonSubTypes.Type(value = SslVerifyCertificate::class, name = "encrypted_verify_certificate"), +) +@JsonSchemaTitle("Encryption") +@JsonSchemaDescription("The encryption method which is used when communicating with the database.") +sealed interface MsSqlServerEncryptionConfigurationSpecification + +@JsonSchemaTitle("Unencrypted") +@JsonSchemaDescription( + "Data transfer will not be encrypted.", +) +data object MsSqlServerEncryptionDisabledConfigurationSpecification : + MsSqlServerEncryptionConfigurationSpecification + +@JsonSchemaTitle("Encrypted (trust server certificate)") +@JsonSchemaDescription( + "Use the certificate provided by the server without verification. (For testing purposes only!)" +) +data object MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification : + MsSqlServerEncryptionConfigurationSpecification + +@JsonSchemaTitle("Encrypted (verify certificate)") +@JsonSchemaDescription("Verify and use the certificate provided by the server.") +@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI") +class SslVerifyCertificate : MsSqlServerEncryptionConfigurationSpecification { + @JsonProperty("hostNameInCertificate") + @JsonSchemaTitle("Host Name In Certificate") + @JsonPropertyDescription( + "Specifies the host name of the server. The value of this property must match the subject property of the certificate.", + ) + @JsonSchemaInject(json = """{"order":0}""") + var hostNameInCertificate: String? = null + + @JsonProperty("certificate", required = false) + @JsonSchemaTitle("Certificate") + @JsonPropertyDescription( + "certificate of the server, or of the CA that signed the server certificate", + ) + @JsonSchemaInject(json = """{"order":1,"airbyte_secret":true,"multiline":true}""") + var certificate: String? = null +} + +class MicronautPropertiesFriendlyMsSqlServerEncryption { + var mode: String = "preferred" + var certificate: String? = null + + @JsonValue + fun asEncryption(): MsSqlServerEncryptionConfigurationSpecification = + when (mode) { + "preferred" -> MsSqlServerEncryptionDisabledConfigurationSpecification + "required" -> + MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification + "verify_ca" -> SslVerifyCertificate().also { it.certificate = certificate!! } + else -> throw ConfigErrorException("invalid value $mode") + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerReplicationMethodConfigurationSpecification.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerReplicationMethodConfigurationSpecification.kt new file mode 100644 index 000000000000..0618622eb915 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerReplicationMethodConfigurationSpecification.kt @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql.config_spec + +import com.fasterxml.jackson.annotation.* +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDescription +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import io.airbyte.cdk.ConfigErrorException + +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "method") +@JsonSubTypes( + JsonSubTypes.Type( + value = MsSqlServerCdcReplicationConfigurationSpecification::class, + name = "CDC" + ), + JsonSubTypes.Type( + value = MsSqlServerCursorBasedReplicationConfigurationSpecification::class, + name = "STANDARD" + ), +) +sealed interface MsSqlServerReplicationMethodConfigurationSpecification + +@JsonSchemaTitle("Read Changes using Change Data Capture (CDC)") +@JsonSchemaDescription( + "Recommended - Incrementally reads new inserts, updates, and deletes using the SQL Server's " + + "" + + "change data capture feature. This must be enabled on your database." +) +@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI") +class MsSqlServerCdcReplicationConfigurationSpecification : + MsSqlServerReplicationMethodConfigurationSpecification { + @JsonProperty("initial_waiting_seconds") + @JsonSchemaTitle("Initial Waiting Time in Seconds (Advanced)") + @JsonPropertyDescription( + "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. " + + "Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds.", + ) + @JsonSchemaInject(json = """{"order":1, "min":120, "max":3600, "default":300}""") + var initialWaitingSeconds: Int? = DEFAULT_INITIAL_WAITING_SECONDS + + @JsonProperty("invalid_cdc_cursor_position_behavior") + @JsonSchemaTitle("Invalid CDC position behavior (Advanced)") + @JsonPropertyDescription( + "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. " + + "If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. " + + "If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.", + ) + @JsonSchemaInject( + json = """{"order":2,"enum": ["Fail sync", "Re-sync data"], "default": "Fail sync"}""" + ) + var invalidCdcCursorPositionBehavior: String? = "Fail sync" + + @JsonProperty("queue_size") + @JsonSchemaTitle("Size of the queue (Advanced)") + @JsonPropertyDescription( + "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.", + ) + @JsonSchemaInject(json = """{"order":3, "min":1000, "max":10000, "default": 10000}""") + var queueSize: Int? = 10000 + + @JsonProperty("initial_load_timeout_hours") + @JsonSchemaTitle("Initial Load Timeout in Hours (Advanced)") + @JsonPropertyDescription( + "The amount of time an initial load is allowed to continue for before catching up on CDC logs.", + ) + @JsonSchemaInject(json = """{"order":4, "min":4, "max":24, "default": 8}""") + var initialLoadTimeoutHours: Int? = 8 + + companion object { + const val DEFAULT_INITIAL_WAITING_SECONDS = 300 + } +} + +@JsonSchemaTitle("Scan Changes with User Defined Cursor") +@JsonSchemaDescription( + "Incrementally detects new inserts and updates using the " + + "" + + "cursor column chosen when configuring a connection (e.g. created_at, updated_at)." +) +class MsSqlServerCursorBasedReplicationConfigurationSpecification : + MsSqlServerReplicationMethodConfigurationSpecification {} + +class MsSqlServerMicronautPropertiesFriendlyMsSqlServerReplicationMethodConfiguration { + val method: String = "CDC" + @JsonValue + fun asReplicationMethod(): MsSqlServerReplicationMethodConfigurationSpecification = + when (method) { + "CDC" -> MsSqlServerCdcReplicationConfigurationSpecification() + "STANDARD" -> MsSqlServerCursorBasedReplicationConfigurationSpecification() + else -> throw ConfigErrorException("invalid value $method") + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerSourceConfigurationSpecification.kt b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerSourceConfigurationSpecification.kt new file mode 100644 index 000000000000..6f1abd046beb --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/kotlin/io/airbyte/integrations/source/mssql/config_spec/MsSqlServerSourceConfigurationSpecification.kt @@ -0,0 +1,120 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql.config_spec + +import com.fasterxml.jackson.annotation.JsonGetter +import com.fasterxml.jackson.annotation.JsonIgnore +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.annotation.JsonPropertyDescription +import com.fasterxml.jackson.annotation.JsonPropertyOrder +import com.fasterxml.jackson.annotation.JsonSetter +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDefault +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import io.airbyte.cdk.command.CONNECTOR_CONFIG_PREFIX +import io.airbyte.cdk.command.ConfigurationSpecification +import io.micronaut.context.annotation.ConfigurationBuilder +import io.micronaut.context.annotation.ConfigurationProperties +import jakarta.inject.Singleton + +/** + * The object which is mapped to the MS SQL Server source configuration JSON. + * + * Use [MysqlSourceConfiguration] instead wherever possible. This object also allows injecting + * values through Micronaut properties, this is made possible by the classes named + * `MicronautPropertiesFriendly.*`. + */ +@JsonSchemaTitle("MSSQL Source Spec") +@JsonPropertyOrder( + value = ["host", "port", "database", "schemas", "username", "password"], +) +@Singleton +@ConfigurationProperties(CONNECTOR_CONFIG_PREFIX) +@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI") +class MsSqlServerSourceConfigurationSpecification : ConfigurationSpecification() { + @JsonProperty("host") + @JsonSchemaTitle("Host") + @JsonSchemaInject(json = """{"order":0}""") + @JsonPropertyDescription("The hostname of the database.") + lateinit var host: String + + @JsonProperty("port") + @JsonSchemaTitle("Port") + @JsonSchemaInject(json = """{"order":1,"minimum": 0,"maximum": 65536, "examples":["1433"]}""") + @JsonSchemaDefault("3306") + @JsonPropertyDescription( + "The port of the database.", + ) + var port: Int = 3306 + + @JsonProperty("database") + @JsonSchemaTitle("Database") + @JsonPropertyDescription("The name of the database.") + @JsonSchemaInject(json = """{"order":2, "examples":["master"]}""") + lateinit var database: String + + @JsonProperty("schemas") + @JsonSchemaTitle("Schemas") + @JsonPropertyDescription("The list of schemas to sync from. Defaults to user. Case sensitive.") + // @DefaultSchemaDefault doesn't seem to work for array types... + @JsonSchemaInject(json = """{"order":3, "default":["dbo"], "minItems":0, "uniqueItems":true}""") + var schemas: Array? = arrayOf("dbo") + + @JsonProperty("username") + @JsonSchemaTitle("Username") + @JsonPropertyDescription("The username which is used to access the database.") + @JsonSchemaInject(json = """{"order":4}""") + lateinit var username: String + + @JsonProperty("password") + @JsonSchemaTitle("Password") + @JsonPropertyDescription("The password associated with the username.") + @JsonSchemaInject(json = """{"order":5,"airbyte_secret":true}""") + lateinit var password: String + + @JsonProperty("jdbc_url_params") + @JsonSchemaTitle("JDBC URL Params") + @JsonPropertyDescription( + "Additional properties to pass to the JDBC URL string when connecting to the database " + + "formatted as 'key=value' pairs separated by the symbol '&'. " + + "(example: key1=value1&key2=value2&key3=value3).", + ) + @JsonSchemaInject(json = """{"order":6}""") + var jdbcUrlParams: String? = null + + @JsonIgnore + @ConfigurationBuilder(configurationPrefix = "ssl_method") + var encryption = MicronautPropertiesFriendlyMsSqlServerEncryption() + @JsonIgnore var encryptionJson: MsSqlServerEncryptionConfigurationSpecification? = null + @JsonSetter("ssl_method") + fun setEncryptionValue(value: MsSqlServerEncryptionConfigurationSpecification) { + encryptionJson = value + } + @JsonGetter("ssl_method") + @JsonSchemaTitle("SSL Method") + @JsonPropertyDescription( + "The encryption method which is used when communicating with the database.", + ) + @JsonSchemaInject(json = """{"order":7}""") + fun getEncryptionValue(): MsSqlServerEncryptionConfigurationSpecification? = + encryptionJson ?: encryption.asEncryption() + + @JsonIgnore + @ConfigurationBuilder(configurationPrefix = "method") + var replicationMethod = + MsSqlServerMicronautPropertiesFriendlyMsSqlServerReplicationMethodConfiguration() + @JsonIgnore + var replicationMethodJson: MsSqlServerReplicationMethodConfigurationSpecification? = null + @JsonSetter("replication_method") + fun setReplicationMethodValue(value: MsSqlServerReplicationMethodConfigurationSpecification) { + replicationMethodJson = value + } + @JsonGetter("replication_method") + @JsonSchemaTitle("Update Method") + @JsonPropertyDescription( + "Configures how data is extracted from the database.", + ) + @JsonSchemaInject(json = """{"order":8, "default":"CDC", "display_type": "radio"}""") + fun getReplicationMethodValue(): MsSqlServerReplicationMethodConfigurationSpecification? = + replicationMethodJson ?: replicationMethod.asReplicationMethod() +} diff --git a/airbyte-integrations/connectors/source-mssql/src/main/resources/application.yml b/airbyte-integrations/connectors/source-mssql/src/main/resources/application.yml new file mode 100644 index 000000000000..b9bc7b0d3d24 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/main/resources/application.yml @@ -0,0 +1,12 @@ +--- +airbyte: + connector: + extract: + jdbc: + mode: sequential + namespace-kind: SCHEMA + check: + jdbc: + queries: + - >- + SELECT 1 where 1 = 0; diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerContainerFactory.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerContainerFactory.kt new file mode 100644 index 000000000000..1b7e873e2f8f --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerContainerFactory.kt @@ -0,0 +1,129 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.testcontainers.TestContainerFactory +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerCursorBasedReplicationConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import java.sql.Statement +import org.apache.commons.lang3.RandomStringUtils +import org.testcontainers.containers.Container +import org.testcontainers.containers.MSSQLServerContainer +import org.testcontainers.containers.Network +import org.testcontainers.utility.DockerImageName + +enum class MsSqlServerImage(val imageName: String) { + SQLSERVER_2022("mcr.microsoft.com/mssql/server:2022-latest") +} + +class MsSqlServercontainer(val realContainer: MSSQLServerContainer<*>) { + val schemaName = "schema_" + RandomStringUtils.insecure().nextAlphabetic(16) +} + +object MsSqlServerContainerFactory { + const val COMPATIBLE_NAME = "mcr.microsoft.com/mssql/server" + + init { + TestContainerFactory.register(COMPATIBLE_NAME, ::MSSQLServerContainer) + } + + sealed interface MysqlContainerModifier : + TestContainerFactory.ContainerModifier> + + data object WithNetwork : MysqlContainerModifier { + override fun modify(container: MSSQLServerContainer<*>) { + container.withNetwork(Network.newNetwork()) + } + } + + data object WithCdcOff : MysqlContainerModifier { + override fun modify(container: MSSQLServerContainer<*>) { + container.withCommand("--skip-log-bin") + } + } + + fun exclusive( + image: MsSqlServerImage, + vararg modifiers: MysqlContainerModifier, + ): MsSqlServercontainer { + val dockerImageName = + DockerImageName.parse(image.imageName).asCompatibleSubstituteFor(COMPATIBLE_NAME) + return MsSqlServercontainer(TestContainerFactory.exclusive(dockerImageName, *modifiers)) + } + + fun shared( + image: MsSqlServerImage, + vararg modifiers: MysqlContainerModifier, + ): MsSqlServercontainer { + val dockerImageName = + DockerImageName.parse(image.imageName).asCompatibleSubstituteFor(COMPATIBLE_NAME) + return MsSqlServercontainer(TestContainerFactory.shared(dockerImageName, *modifiers)) + } + + @JvmStatic + fun config(msSQLContainer: MsSqlServercontainer): MsSqlServerSourceConfigurationSpecification { + val schemaName = msSQLContainer.schemaName + val config = + MsSqlServerSourceConfigurationSpecification().apply { + host = msSQLContainer.realContainer.host + port = + msSQLContainer.realContainer.getMappedPort( + MSSQLServerContainer.MS_SQL_SERVER_PORT + ) + username = msSQLContainer.realContainer.username + password = msSQLContainer.realContainer.password + jdbcUrlParams = "" + database = "master" + schemas = arrayOf(schemaName) + replicationMethodJson = + MsSqlServerCursorBasedReplicationConfigurationSpecification() + } + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config)).get().use { + connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("CREATE SCHEMA $schemaName") + } + connection.createStatement().use { stmt: Statement -> + stmt.execute( + "CREATE TABLE $schemaName.name_and_born(name VARCHAR(200), born DATETIMEOFFSET(7));" + ) + stmt.execute( + "CREATE TABLE $schemaName.id_name_and_born(id INTEGER PRIMARY KEY, name VARCHAR(200), born DATETIMEOFFSET(7));" + ) + } + connection.createStatement().use { stmt: Statement -> + stmt.execute( + "INSERT INTO $schemaName.name_and_born (name, born) VALUES ('foo', '2022-03-21 15:43:15.45'), ('bar', '2022-10-22 01:02:03.04')" + ) + stmt.execute( + "INSERT INTO $schemaName.id_name_and_born (id, name, born) VALUES (1, 'foo', '2022-03-21 15:43:15.45'), (2, 'bar', '2022-10-22 01:02:03.04')" + ) + } + } + return config + } + + @JvmStatic + fun cdcConfig( + msSQLContainer: MSSQLServerContainer<*> + ): MsSqlServerSourceConfigurationSpecification = + MsSqlServerSourceConfigurationSpecification().apply { + host = msSQLContainer.host + port = msSQLContainer.getMappedPort(MSSQLServerContainer.MS_SQL_SERVER_PORT) + username = msSQLContainer.username + password = msSQLContainer.password + jdbcUrlParams = "" + database = "dbo" + } + + fun MSSQLServerContainer<*>.execAsRoot(sql: String) { + val cleanSql: String = sql.trim().removeSuffix(";") + ";" + val result: Container.ExecResult = + execInContainer("mysql", "-u", "root", "-ptest", "-e", cleanSql) + if (result.exitCode == 0) { + return + } + throw RuntimeException("Failed to execute query $cleanSql") + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCursorBasedIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCursorBasedIntegrationTest.kt new file mode 100644 index 000000000000..0bb1c4cd3216 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerCursorBasedIntegrationTest.kt @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.StreamIdentifier +import io.airbyte.cdk.command.CliRunner +import io.airbyte.cdk.discover.DiscoveredStream +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.IntFieldType +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.jdbc.StringFieldType +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.v0.AirbyteRecordMessage +import io.airbyte.protocol.models.v0.AirbyteStateMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.CatalogHelpers +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream +import io.airbyte.protocol.models.v0.StreamDescriptor +import io.airbyte.protocol.models.v0.SyncMode +import io.github.oshai.kotlinlogging.KotlinLogging +import java.sql.Connection +import java.sql.Statement +import kotlin.test.assertEquals +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.Timeout + +class MsSqlServerCursorBasedIntegrationTest { + + @Test + fun testCursorBasedRead() { + val run1: BufferingOutputConsumer = + CliRunner.source("read", config, getConfiguredCatalog()).run() + + val lastStateMessageFromRun1 = run1.states().last() + val lastStreamStateFromRun1 = lastStateMessageFromRun1.stream.streamState + println("SGX lastStreamStateFromRun1=$lastStreamStateFromRun1") + + assertEquals("20", lastStreamStateFromRun1.get("cursor").textValue()) + assertEquals(2, lastStreamStateFromRun1.get("version").intValue()) + assertEquals("cursor_based", lastStreamStateFromRun1.get("state_type").asText()) + assertEquals(tableName, lastStreamStateFromRun1.get("stream_name").asText()) + assertEquals(listOf("k"), lastStreamStateFromRun1.get("cursor_field").map { it.asText() }) + assertEquals( + dbContainer.schemaName, + lastStreamStateFromRun1.get("stream_namespace").asText() + ) + assertEquals(0, lastStreamStateFromRun1.get("cursor_record_count").asInt()) + + connectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute( + "INSERT INTO ${dbContainer.schemaName}.$tableName (k, v) VALUES (3, 'baz-ignore')" + ) + stmt.execute( + "INSERT INTO ${dbContainer.schemaName}.$tableName (k, v) VALUES (13, 'baz-ignore')" + ) + stmt.execute( + "INSERT INTO ${dbContainer.schemaName}.$tableName (k, v) VALUES (30, 'baz')" + ) + } + } + + val run2InputState: List = listOf(lastStateMessageFromRun1) + val run2: BufferingOutputConsumer = + CliRunner.source("read", config, getConfiguredCatalog(), run2InputState).run() + val recordMessageFromRun2: List = run2.records() + assertEquals(recordMessageFromRun2.size, 1) + } + + @Test + fun testWithV1State() { + var state: AirbyteStateMessage = Jsons.readValue(V1_STATE, AirbyteStateMessage::class.java) + val run1: BufferingOutputConsumer = + CliRunner.source("read", config, getConfiguredCatalog(), listOf(state)).run() + val recordMessagesFromRun1: List = run1.records() + assertEquals( + actual = recordMessagesFromRun1.size, + expected = 1, + message = recordMessagesFromRun1.toString() + ) + } + + @Test + fun testWithFullRefresh() { + val fullRefreshCatalog = + getConfiguredCatalog().apply { streams[0].syncMode = SyncMode.FULL_REFRESH } + val run1: BufferingOutputConsumer = + CliRunner.source("read", config, fullRefreshCatalog).run() + val recordMessageFromRun1: List = run1.records() + assertEquals(3, recordMessageFromRun1.size, recordMessageFromRun1.toString()) + val lastStateMessageFromRun1 = run1.states().last() + + val run2: BufferingOutputConsumer = + CliRunner.source("read", config, fullRefreshCatalog, listOf(lastStateMessageFromRun1)) + .run() + val recordMessageFromRun2: List = run2.records() + assertEquals(recordMessageFromRun2.size, 0) + } + + companion object { + val log = KotlinLogging.logger {} + val dbContainer: MsSqlServercontainer = + MsSqlServerContainerFactory.shared(MsSqlServerImage.SQLSERVER_2022) + + val config: MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.config(dbContainer) + + val connectionFactory: JdbcConnectionFactory by lazy { + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config)) + } + + fun getConfiguredCatalog(): ConfiguredAirbyteCatalog { + val desc = StreamDescriptor().withName(tableName).withNamespace(dbContainer.schemaName) + val discoveredStream = + DiscoveredStream( + id = StreamIdentifier.Companion.from(desc), + columns = listOf(Field("k", IntFieldType), Field("v", StringFieldType)), + primaryKeyColumnIDs = listOf(listOf("k")), + ) + val stream: AirbyteStream = MsSqlServerStreamFactory().createGlobal(discoveredStream) + val configuredStream: ConfiguredAirbyteStream = + CatalogHelpers.toDefaultConfiguredStream(stream) + .withSyncMode(SyncMode.INCREMENTAL) + .withPrimaryKey(discoveredStream.primaryKeyColumnIDs) + .withCursorField(listOf("k")) + return ConfiguredAirbyteCatalog().withStreams(listOf(configuredStream)) + } + + @JvmStatic + @BeforeAll + @Timeout(value = 300) + fun startAndProvisionTestContainer() { + provisionTestContainer(connectionFactory) + } + + lateinit var tableName: String + + fun provisionTestContainer(targetConnectionFactory: JdbcConnectionFactory) { + tableName = (1..8).map { ('a'..'z').random() }.joinToString("") + + targetConnectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute( + "CREATE TABLE ${dbContainer.schemaName}.$tableName(k INT PRIMARY KEY, v VARCHAR(80))" + ) + } + connection.createStatement().use { stmt: Statement -> + stmt.execute( + "INSERT INTO ${dbContainer.schemaName}.$tableName (k, v) VALUES (5, 'abc'), (10, 'foo'), (20, 'bar')" + ) + } + } + } + } + val V1_STATE: String = + """ + { + "type": "STREAM", + "stream": { + "stream_descriptor": { + "name": "${tableName}", + "namespace": "${dbContainer.schemaName}" + }, + "stream_state": { + "cursor": "10", + "version": 2, + "state_type": "cursor_based", + "stream_name": "${tableName}", + "cursor_field": [ + "k" + ], + "stream_namespace": "${dbContainer.schemaName}", + "cursor_record_count": 1 + } + } + } + """ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationSpecificationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationSpecificationTest.kt new file mode 100644 index 000000000000..a2512aee63f7 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationSpecificationTest.kt @@ -0,0 +1,76 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.ConfigErrorException +import io.airbyte.cdk.command.ConfigurationSpecificationSupplier +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerEncryptionConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerEncryptionDisabledConfigurationSpecification +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.micronaut.context.annotation.Property +import io.micronaut.context.env.Environment +import io.micronaut.test.extensions.junit5.annotation.MicronautTest +import jakarta.inject.Inject +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +@MicronautTest(environments = [Environment.TEST], rebuildContext = true) +class MysqlSourceConfigurationSpecificationTestTest { + @Inject + lateinit var supplier: + ConfigurationSpecificationSupplier + + @Test + fun testSchemaViolation() { + Assertions.assertThrows(ConfigErrorException::class.java, supplier::get) + } + + @Test + @Property(name = "airbyte.connector.config.json", value = CONFIG_JSON) + fun testJson() { + val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get() + Assertions.assertEquals("localhost", pojo.host) + Assertions.assertEquals(12345, pojo.port) + Assertions.assertEquals("FOO", pojo.username) + Assertions.assertEquals("BAR", pojo.password) + Assertions.assertEquals("SYSTEM", pojo.database) + val encryption: MsSqlServerEncryptionConfigurationSpecification? = pojo.getEncryptionValue() + Assertions.assertTrue( + encryption is MsSqlServerEncryptionDisabledConfigurationSpecification, + encryption!!::class.toString() + ) + /*val tunnelMethod: SshTunnelMethodConfiguration? = pojo.getTunnelMethodValue() + Assertions.assertTrue( + tunnelMethod is SshPasswordAuthTunnelMethod, + tunnelMethod!!::class.toString(), + ) + Assertions.assertEquals(60, pojo.checkpointTargetIntervalSeconds) + Assertions.assertEquals(2, pojo.concurrency)*/ + } +} + +const val CONFIG_JSON: String = + """ +{ + "host": "localhost", + "port": 12345, + "username": "FOO", + "password": "BAR", + "database": "SYSTEM", + "ssl_mode": { + "mode": "preferred" + }, + "tunnel_method": { + "tunnel_method": "SSH_PASSWORD_AUTH", + "tunnel_host": "localhost", + "tunnel_port": 2222, + "tunnel_user": "sshuser", + "tunnel_user_password": "***" + }, + "replication_method": { + "method": "STANDARD" + }, + "checkpoint_target_interval_seconds": 60, + "jdbc_url_params": "theAnswerToLiveAndEverything=42&sessionVariables=max_execution_time=10000&foo=bar&", + "concurrency": 2 +} +""" diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationTest.kt new file mode 100644 index 000000000000..459a89b28b2c --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceConfigurationTest.kt @@ -0,0 +1,155 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.ConfigErrorException +import io.airbyte.cdk.command.AIRBYTE_CLOUD_ENV +import io.airbyte.cdk.command.ConfigurationSpecificationSupplier +import io.airbyte.cdk.command.SourceConfigurationFactory +import io.airbyte.cdk.ssh.SshNoTunnelMethod +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.micronaut.context.annotation.Property +import io.micronaut.context.env.Environment +import io.micronaut.test.extensions.junit5.annotation.MicronautTest +import jakarta.inject.Inject +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +@MicronautTest(environments = [Environment.TEST, AIRBYTE_CLOUD_ENV], rebuildContext = true) +class MysqlSourceConfigurationTest { + @Inject + lateinit var pojoSupplier: + ConfigurationSpecificationSupplier + + @Inject + lateinit var factory: + SourceConfigurationFactory< + MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration> + + @Test + @Property(name = "airbyte.connector.config.host", value = "localhost") + @Property(name = "airbyte.connector.config.port", value = "12345") + @Property(name = "airbyte.connector.config.username", value = "FOO") + @Property(name = "airbyte.connector.config.password", value = "BAR") + @Property(name = "airbyte.connector.config.database", value = "SYSTEM") + @Property(name = "airbyte.connector.config.ssl_mode.mode", value = "required") + @Property( + name = "airbyte.connector.config.jdbc_url_params", + value = "theAnswerToLiveAndEverything=42&sessionVariables=max_execution_time=10000&foo=bar&" + ) + fun testParseJdbcParameters() { + val pojo: MsSqlServerSourceConfigurationSpecification = pojoSupplier.get() + + val config = factory.makeWithoutExceptionHandling(pojo) + + Assertions.assertEquals(config.realHost, "localhost") + Assertions.assertEquals(config.realPort, 12345) + Assertions.assertEquals(config.namespaces, setOf("SYSTEM")) + Assertions.assertTrue(config.sshTunnel is SshNoTunnelMethod) + + Assertions.assertEquals(config.jdbcProperties["user"], "FOO") + Assertions.assertEquals(config.jdbcProperties["password"], "BAR") + + // Make sure we don't accidentally drop the following hardcoded settings for mysql. + Assertions.assertEquals(config.jdbcProperties["useCursorFetch"], "true") + Assertions.assertEquals(config.jdbcProperties["sessionVariables"], "autocommit=0") + + Assertions.assertEquals(config.jdbcProperties["theAnswerToLiveAndEverything"], "42") + Assertions.assertEquals(config.jdbcProperties["foo"], "bar") + } + + @Test + @Property(name = "airbyte.connector.config.host", value = "localhost") + @Property(name = "airbyte.connector.config.port", value = "12345") + @Property(name = "airbyte.connector.config.username", value = "FOO") + @Property(name = "airbyte.connector.config.password", value = "BAR") + @Property(name = "airbyte.connector.config.database", value = "SYSTEM") + fun testAirbyteCloudDeployment() { + val pojo: MsSqlServerSourceConfigurationSpecification = pojoSupplier.get() + Assertions.assertThrows(ConfigErrorException::class.java) { + factory.makeWithoutExceptionHandling(pojo) + } + } + + @Test + @Property(name = "airbyte.connector.config.json", value = CONFIG_V1) + fun testParseConfigFromV1() { + val pojo: MsSqlServerSourceConfigurationSpecification = pojoSupplier.get() + + val config = factory.makeWithoutExceptionHandling(pojo) + + Assertions.assertEquals(config.realHost, "localhost") + Assertions.assertEquals(config.realPort, 12345) + Assertions.assertEquals(config.namespaces, setOf("dbo")) + + Assertions.assertEquals(config.jdbcProperties["user"], "FOO") + Assertions.assertEquals(config.jdbcProperties["password"], "BAR") + // Assertions.assertEquals(config.jdbcProperties["sslMode"], "required") + Assertions.assertTrue( + config.incrementalReplicationConfiguration + is MsSqlServerCdcIncrementalReplicationConfiguration + ) + + val cdcCursor = + config.incrementalReplicationConfiguration + as MsSqlServerCdcIncrementalReplicationConfiguration + + Assertions.assertEquals(cdcCursor.initialWaitingSeconds, 301) + /*Assertions.assertEquals(cdcCursor.initialLoadTimeout, Duration.ofHours(9)) + Assertions.assertEquals( + cdcCursor.invalidCdcCursorPositionBehavior, + InvalidCdcCursorPositionBehavior.RESET_SYNC + )*/ + + Assertions.assertTrue(config.sshTunnel is SshNoTunnelMethod) + } +} + +const val CONFIG: String = + """ +{ + "host": "localhost", + "port": 12345, + "username": "FOO", + "password": "BAR", + "database": "SYSTEM", + "ssl_mode": { + "mode": "preferred" + }, + "tunnel_method": { + "tunnel_method": "SSH_PASSWORD_AUTH", + "tunnel_host": "localhost", + "tunnel_port": 2222, + "tunnel_user": "sshuser", + "tunnel_user_password": "***" + }, + "replication_method": { + "method": "STANDARD" + }, + "checkpoint_target_interval_seconds": 60, + "jdbc_url_params": "theAnswerToLiveAndEverything=42&sessionVariables=max_execution_time=10000&foo=bar&", + "concurrency": 2 +} +""" + +const val CONFIG_V1: String = + """ +{ + "host": "localhost", + "port": 12345, + "database": "SYSTEM", + "password": "BAR", + "ssl_mode": { + "mode": "required" + }, + "username": "FOO", + "tunnel_method": { + "tunnel_method": "NO_TUNNEL" + }, + "replication_method": { + "method": "CDC", + "initial_waiting_seconds": 301, + "initial_load_timeout_hours": 9, + "invalid_cdc_cursor_position_behavior": "Re-sync data" + } +} +""" diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceDatatypeIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceDatatypeIntegrationTest.kt new file mode 100644 index 000000000000..54f996f9bdca --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceDatatypeIntegrationTest.kt @@ -0,0 +1,466 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.ClockFactory +import io.airbyte.cdk.command.CliRunner +import io.airbyte.cdk.data.AirbyteSchemaType +import io.airbyte.cdk.data.LeafAirbyteSchemaType +import io.airbyte.cdk.jdbc.JdbcConnectionFactory +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.v0.AirbyteMessage +import io.airbyte.protocol.models.v0.AirbyteRecordMessage +import io.airbyte.protocol.models.v0.AirbyteStream +import io.airbyte.protocol.models.v0.AirbyteTraceMessage +import io.airbyte.protocol.models.v0.CatalogHelpers +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream +import io.airbyte.protocol.models.v0.SyncMode +import io.github.oshai.kotlinlogging.KotlinLogging +import java.sql.Connection +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.DynamicContainer +import org.junit.jupiter.api.DynamicNode +import org.junit.jupiter.api.DynamicTest +import org.junit.jupiter.api.TestFactory +import org.junit.jupiter.api.Timeout + +private val log = KotlinLogging.logger {} + +class MsSqlServerSourceDatatypeIntegrationTest { + @TestFactory + @Timeout(300) + fun syncTests(): Iterable { + val discover: DynamicNode = + DynamicTest.dynamicTest("discover") { + Assertions.assertFalse(LazyValues.actualStreams.isEmpty()) + } + val read: DynamicNode = + DynamicTest.dynamicTest("read") { + Assertions.assertFalse(LazyValues.actualReads.isEmpty()) + } + val cases: List = + allStreamNamesAndRecordData.keys.map { streamName: String -> + DynamicContainer.dynamicContainer( + streamName, + listOf( + DynamicTest.dynamicTest("discover") { discover(streamName) }, + DynamicTest.dynamicTest("records") { records(streamName) }, + ), + ) + } + return listOf(discover, read) + cases + } + + object LazyValues { + val actualStreams: Map by lazy { + val output: BufferingOutputConsumer = CliRunner.source("discover", config()).run() + output.catalogs().firstOrNull()?.streams?.filterNotNull()?.associateBy { it.name } + ?: mapOf() + } + + val configuredCatalog: ConfiguredAirbyteCatalog by lazy { + val configuredStreams: List = + allStreamNamesAndRecordData.keys + .mapNotNull { actualStreams[it] } + .map(CatalogHelpers::toDefaultConfiguredStream) + for (configuredStream in configuredStreams) { + if (configuredStream.stream.supportedSyncModes.contains(SyncMode.INCREMENTAL)) { + configuredStream.syncMode = SyncMode.INCREMENTAL + } + } + ConfiguredAirbyteCatalog().withStreams(configuredStreams) + } + + val allReadMessages: List by lazy { + CliRunner.source("read", config(), configuredCatalog).run().messages() + } + + val actualReads: Map by lazy { + val result: Map = + allStreamNamesAndRecordData.keys.associateWith { + BufferingOutputConsumer(ClockFactory().fixed()) + } + for (msg in allReadMessages) { + result[streamName(msg) ?: continue]?.accept(msg) + } + result + } + + fun streamName(msg: AirbyteMessage): String? = + when (msg.type) { + AirbyteMessage.Type.RECORD -> msg.record?.stream + AirbyteMessage.Type.STATE -> msg.state?.stream?.streamDescriptor?.name + AirbyteMessage.Type.TRACE -> + when (msg.trace?.type) { + AirbyteTraceMessage.Type.ERROR -> msg.trace?.error?.streamDescriptor?.name + AirbyteTraceMessage.Type.ESTIMATE -> msg.trace?.estimate?.name + AirbyteTraceMessage.Type.STREAM_STATUS -> + msg.trace?.streamStatus?.streamDescriptor?.name + AirbyteTraceMessage.Type.ANALYTICS -> null + null -> null + } + else -> null + } + } + + private fun discover(streamName: String) { + val actualStream: AirbyteStream? = LazyValues.actualStreams[streamName] + log.info { "discover result: ${LazyValues.actualStreams}" } + log.info { "streamName: $streamName" } + Assertions.assertNotNull(actualStream) + log.info { + "test case $streamName: discovered stream ${ + Jsons.valueToTree( + actualStream, + ) + }" + } + val testCase: TestCase = + testCases.find { it.streamNamesToRecordData.keys.contains(streamName) }!! + val isIncrementalSupported: Boolean = + actualStream!!.supportedSyncModes.contains(SyncMode.INCREMENTAL) + val jsonSchema: JsonNode = actualStream.jsonSchema?.get("properties")!! + if (streamName == testCase.tableName) { + val actualSchema: JsonNode = jsonSchema[testCase.columnName] + Assertions.assertNotNull(actualSchema) + val expectedSchema: JsonNode = testCase.airbyteSchemaType.asJsonSchema() + Assertions.assertEquals(expectedSchema, actualSchema) + if (testCase.cursor) { + Assertions.assertTrue(isIncrementalSupported) + } else { + Assertions.assertFalse(isIncrementalSupported) + } + } + } + + private fun records(streamName: String) { + val actualRead: BufferingOutputConsumer? = LazyValues.actualReads[streamName] + Assertions.assertNotNull(actualRead) + + fun sortedRecordData(data: List): JsonNode = + Jsons.createArrayNode().apply { addAll(data.sortedBy { it.toString() }) } + + val actualRecords: List = actualRead?.records() ?: listOf() + + val actual: JsonNode = sortedRecordData(actualRecords.mapNotNull { it.data }) + log.info { "test case $streamName: emitted records $actual" } + val expected: JsonNode = sortedRecordData(allStreamNamesAndRecordData[streamName]!!) + + Assertions.assertEquals(expected, actual) + } + + companion object { + lateinit var dbContainer: MsSqlServercontainer + + fun config(): MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.config(dbContainer) + + val connectionFactory: JdbcConnectionFactory by lazy { + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config())) + } + + val bitValues = + mapOf( + "b'1'" to "true", + "b'0'" to "false", + ) + + val longBitValues = + mapOf( + "b'10101010'" to """-86""", + ) + + val stringValues = + mapOf( + "'abcdef'" to """"abcdef"""", + "'ABCD'" to """"ABCD"""", + "'OXBEEF'" to """"OXBEEF"""", + ) + + val jsonValues = mapOf("""'{"col1": "v1"}'""" to """"{\"col1\": \"v1\"}"""") + + val yearValues = + mapOf( + "1992" to """1992""", + "2002" to """2002""", + "70" to """1970""", + ) + + val decimalValues = + mapOf( + "0.2" to """0.2""", + ) + + val zeroPrecisionDecimalValues = + mapOf( + "2" to """2""", + ) + + val tinyintValues = + mapOf( + "10" to "10", + "4" to "4", + "2" to "2", + ) + + val intValues = + mapOf( + "10" to "10", + "100000000" to "100000000", + "200000000" to "200000000", + ) + + val dateValues = + mapOf( + "'2022-01-01'" to """"2022-01-01"""", + ) + + val timeValues = + mapOf( + "'14:30:00'" to """"14:30:00.000000"""", + ) + + val dateTimeValues = + mapOf( + "'2024-09-13 14:30:00'" to """"2024-09-13T14:30:00.000000"""", + "'2024-09-13T14:40:00+00:00'" to """"2024-09-13T14:40:00.000000"""" + ) + + val timestampValues = + mapOf( + "'2024-09-12 14:30:00'" to """"2024-09-12T14:30:00.000000Z"""", + "CONVERT_TZ('2024-09-12 14:30:00', 'America/Los_Angeles', 'UTC')" to + """"2024-09-12T21:30:00.000000Z"""", + ) + + val booleanValues = + mapOf( + "TRUE" to "true", + "FALSE" to "false", + ) + + val enumValues = + mapOf( + "'a'" to """"a"""", + "'b'" to """"b"""", + "'c'" to """"c"""", + ) + + // Encoded into base64 + val binaryValues = + mapOf( + "X'89504E470D0A1A0A0000000D49484452'" to """"iVBORw0KGgoAAAANSUhEUg=="""", + ) + + val testCases: List = + listOf( + TestCase( + "BOOLEAN", + booleanValues, + airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN, + cursor = false + ), + TestCase( + "VARCHAR(10)", + stringValues, + airbyteSchemaType = LeafAirbyteSchemaType.STRING + ), + TestCase( + "DECIMAL(10,2)", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "DECIMAL(10,2) UNSIGNED", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "DECIMAL UNSIGNED", + zeroPrecisionDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase("FLOAT", decimalValues, airbyteSchemaType = LeafAirbyteSchemaType.NUMBER), + TestCase( + "FLOAT(7,4)", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "FLOAT(53,8)", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase("DOUBLE", decimalValues, airbyteSchemaType = LeafAirbyteSchemaType.NUMBER), + TestCase( + "DOUBLE UNSIGNED", + decimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "TINYINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "TINYINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "SMALLINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "MEDIUMINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase("BIGINT", intValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "SMALLINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "MEDIUMINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "BIGINT UNSIGNED", + intValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase("INT", intValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "INT UNSIGNED", + intValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase("DATE", dateValues, airbyteSchemaType = LeafAirbyteSchemaType.DATE), + TestCase( + "TIMESTAMP", + timestampValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE + ), + TestCase( + "DATETIME", + dateTimeValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE + ), + TestCase( + "TIME", + timeValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE + ), + TestCase("YEAR", yearValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "VARBINARY(255)", + binaryValues, + airbyteSchemaType = LeafAirbyteSchemaType.BINARY, + cursor = true, + noPK = false + ), + TestCase( + "BIT", + bitValues, + airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN, + cursor = false + ), + TestCase( + "BIT(8)", + longBitValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER + ), + TestCase( + "JSON", + jsonValues, + airbyteSchemaType = LeafAirbyteSchemaType.STRING, + noPK = true + ), + TestCase( + "ENUM('a', 'b', 'c')", + enumValues, + airbyteSchemaType = LeafAirbyteSchemaType.STRING, + noPK = true + ), + ) + + val allStreamNamesAndRecordData: Map> = + testCases.flatMap { it.streamNamesToRecordData.toList() }.toMap() + + @JvmStatic + @BeforeAll + @Timeout(value = 300) + fun startAndProvisionTestContainer() { + dbContainer = + MsSqlServerContainerFactory.exclusive( + MsSqlServerImage.SQLSERVER_2022, + MsSqlServerContainerFactory.WithNetwork, + ) + connectionFactory + .get() + .also { it.isReadOnly = false } + .use { connection: Connection -> + for (case in testCases) { + for (sql in case.sqlStatements) { + log.info { "test case ${case.id}: executing $sql" } + connection.createStatement().use { stmt -> stmt.execute(sql) } + } + } + } + } + } + + data class TestCase( + val sqlType: String, + val sqlToAirbyte: Map, + val airbyteSchemaType: AirbyteSchemaType = LeafAirbyteSchemaType.STRING, + val cursor: Boolean = true, + val noPK: Boolean = false, + val customDDL: List? = null, + ) { + val id: String + get() = + sqlType + .replace("[^a-zA-Z0-9]".toRegex(), " ") + .trim() + .replace(" +".toRegex(), "_") + .lowercase() + + val tableName: String + get() = "tbl_$id" + + val columnName: String + get() = "col_$id" + + val sqlStatements: List + get() { + val ddl: List = + listOf( + "CREATE DATABASE IF NOT EXISTS test", + "USE test", + "CREATE TABLE IF NOT EXISTS $tableName " + + "($columnName $sqlType ${if (noPK) "" else "PRIMARY KEY"})", + "TRUNCATE TABLE $tableName", + ) + val dml: List = + sqlToAirbyte.keys.map { "INSERT INTO $tableName ($columnName) VALUES ($it)" } + + return ddl + dml + } + + val streamNamesToRecordData: Map> + get() { + val recordData: List = + sqlToAirbyte.values.map { Jsons.readTree("""{"${columnName}":$it}""") } + return mapOf(tableName to recordData) + } + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceSelectQueryGeneratorTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceSelectQueryGeneratorTest.kt new file mode 100644 index 000000000000..3f02b4192226 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSourceSelectQueryGeneratorTest.kt @@ -0,0 +1,140 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.JsonNode +import io.airbyte.cdk.discover.Field +import io.airbyte.cdk.jdbc.DoubleFieldType +import io.airbyte.cdk.jdbc.IntFieldType +import io.airbyte.cdk.jdbc.LosslessJdbcFieldType +import io.airbyte.cdk.jdbc.OffsetDateTimeFieldType +import io.airbyte.cdk.jdbc.StringFieldType +import io.airbyte.cdk.read.And +import io.airbyte.cdk.read.Equal +import io.airbyte.cdk.read.From +import io.airbyte.cdk.read.Greater +import io.airbyte.cdk.read.LesserOrEqual +import io.airbyte.cdk.read.Limit +import io.airbyte.cdk.read.Or +import io.airbyte.cdk.read.OrderBy +import io.airbyte.cdk.read.SelectColumnMaxValue +import io.airbyte.cdk.read.SelectColumns +import io.airbyte.cdk.read.SelectQuery +import io.airbyte.cdk.read.SelectQuerySpec +import io.airbyte.cdk.read.Where +import io.airbyte.cdk.read.optimize +import io.airbyte.cdk.util.Jsons +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +class MsSqlServerSourceSelectQueryGeneratorTest { + @Test + fun testSelectLimit0() { + SelectQuerySpec( + SelectColumns( + listOf( + Field("k", IntFieldType), + Field("v", StringFieldType), + ), + ), + From("TBL", "SC"), + limit = Limit(0), + ) + .assertSqlEquals("""SELECT TOP 0 k, v FROM SC.TBL""") + } + + @Test + fun testSelectMaxCursor() { + SelectQuerySpec( + SelectColumnMaxValue(Field("ts", OffsetDateTimeFieldType)), + From("TBL", "SC"), + ) + .assertSqlEquals("""SELECT MAX(ts) FROM SC.TBL""") + } + + @Test + fun testSelectForNonResumableInitialSync() { + SelectQuerySpec( + SelectColumns( + listOf( + Field("k", IntFieldType), + Field("v", StringFieldType), + ), + ), + From("TBL", "SC"), + ) + .assertSqlEquals("""SELECT k, v FROM SC.TBL""") + } + + @Test + fun testSelectForResumableInitialSync() { + val k1 = Field("k1", IntFieldType) + val v1 = Jsons.numberNode(10) + val k2 = Field("k2", IntFieldType) + val v2 = Jsons.numberNode(20) + val k3 = Field("k3", IntFieldType) + val v3 = Jsons.numberNode(30) + SelectQuerySpec( + SelectColumns(listOf(k1, k2, k3, Field("msg", StringFieldType))), + From("TBL", "SC"), + Where( + Or( + listOf( + And(listOf(Greater(k1, v1))), + And(listOf(Equal(k1, v1), Greater(k2, v2))), + And(listOf(Equal(k1, v1), Equal(k2, v2), Greater(k3, v3))), + ), + ), + ), + OrderBy(listOf(k1, k2, k3)), + Limit(1000), + ) + .assertSqlEquals( + """SELECT TOP 1000 k1, k2, k3, msg FROM """ + + """SC.TBL WHERE (k1 > ?) OR """ + + """((k1 = ?) AND (k2 > ?)) OR """ + + """((k1 = ?) AND (k2 = ?) AND (k3 > ?)) """ + + """ORDER BY k1, k2, k3""", + v1 to IntFieldType, + v1 to IntFieldType, + v2 to IntFieldType, + v1 to IntFieldType, + v2 to IntFieldType, + v3 to IntFieldType, + ) + } + + @Test + fun testSelectForCursorBasedIncrementalSync() { + val c = Field("c", DoubleFieldType) + val lb = Jsons.numberNode(0.5) + val ub = Jsons.numberNode(0.5) + SelectQuerySpec( + SelectColumns(listOf(Field("msg", StringFieldType), c)), + From("TBL", "SC"), + Where(And(listOf(Greater(c, lb), LesserOrEqual(c, ub)))), + OrderBy(listOf(c)), + Limit(1000), + ) + .assertSqlEquals( + """SELECT TOP 1000 msg, c FROM """ + + """SC.TBL """ + + """WHERE (c > ?) AND (c <= ?) ORDER BY c""", + lb to DoubleFieldType, + ub to DoubleFieldType, + ) + } + + private fun SelectQuerySpec.assertSqlEquals( + sql: String, + vararg bindings: Pair>, + ) { + val expected = + SelectQuery( + sql, + select.columns, + bindings.map { SelectQuery.Binding(it.first, it.second) }, + ) + val actual: SelectQuery = MsSqlServerSelectQueryGenerator().generate(this.optimize()) + Assertions.assertEquals(expected, actual) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSpecIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSpecIntegrationTest.kt new file mode 100644 index 000000000000..9ded4684abc6 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MsSqlServerSpecIntegrationTest.kt @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import com.fasterxml.jackson.databind.ObjectMapper +import io.airbyte.cdk.command.CliRunner +import io.airbyte.cdk.command.SyncsTestFixture +import io.airbyte.cdk.output.BufferingOutputConsumer +import io.airbyte.cdk.util.Jsons +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.airbyte.protocol.models.Field +import io.airbyte.protocol.models.JsonSchemaType +import io.airbyte.protocol.models.v0.* +import java.nio.file.Files +import java.nio.file.Paths +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test + +class MsSqlServerSpecIntegrationTest { + @Test + fun testSpec() { + SyncsTestFixture.testSpec("expected_spec.json") + } + + @Test + fun testCheck() { + val it = MsSqlServerContainerFactory.shared(MsSqlServerImage.SQLSERVER_2022) + SyncsTestFixture.testCheck(MsSqlServerContainerFactory.config(it)) + } + + @Test + fun testDiscover() { + val container = MsSqlServerContainerFactory.shared(MsSqlServerImage.SQLSERVER_2022) + val config = MsSqlServerContainerFactory.config(container) + val discoverOutput: BufferingOutputConsumer = CliRunner.source("discover", config).run() + Assertions.assertEquals( + listOf( + AirbyteCatalog() + .withStreams( + listOf( + AirbyteStream() + .withName("id_name_and_born") + .withJsonSchema( + Jsons.readTree( + """{"type":"object","properties":{"born":{"type":"string"},"name":{"type":"string"},"id":{"type":"number","airbyte_type":"integer"}}}""" + ) + ) + .withSupportedSyncModes( + listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL) + ) + .withSourceDefinedCursor(false) + .withNamespace(config.schemas!![0]) + .withSourceDefinedPrimaryKey(listOf(listOf("id"))) + .withIsResumable(true), + AirbyteStream() + .withName("name_and_born") + .withJsonSchema( + Jsons.readTree( + """{"type":"object","properties":{"born":{"type":"string"},"name":{"type":"string"}}}""" + ) + ) + .withSupportedSyncModes( + listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL) + ) + .withSourceDefinedCursor(false) + .withNamespace(config.schemas!![0]) + ) + ) + ), + discoverOutput.catalogs() + ) + } + + @Test + fun testSync() { + val container = MsSqlServerContainerFactory.shared(MsSqlServerImage.SQLSERVER_2022) + val config = MsSqlServerContainerFactory.config(container) + val configuredCatalog = + ConfiguredAirbyteCatalog() + .withStreams( + listOf( + ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream( + CatalogHelpers.createAirbyteStream( + "name_and_born", + config.schemas!![0], + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING) + ) + .withSupportedSyncModes( + listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL) + ) + ), + ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withCursorField(listOf("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream( + CatalogHelpers.createAirbyteStream( + "id_name_and_born", + config.schemas!![0], + Field.of("id", JsonSchemaType.INTEGER), + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING) + ) + .withSupportedSyncModes( + listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL) + ) + ) + ) + ) + + val readOutput: BufferingOutputConsumer = + CliRunner.source("read", config, configuredCatalog, listOf()).run() + // println("SGXX + // records=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.records())}") + println( + "SGXX: specs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.specs())}" + ) + println( + "SGXX logs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.logs())}" + ) + println( + "SGXX states=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.states())}" + ) + println( + "SGXX statuses=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.statuses())}" + ) + // println("SGXX + // messages=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.messages())}") + println( + "SGXX traces=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.traces())}" + ) + // println("SGXX + // readOutput=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput)}") + + } + + @Test + fun testSync2() { + // val container = + // MsSqlServerContainerFactory.shared(MsSqlServerContainerFactory.SQLSERVER_2022) + val configString = Files.readString(Paths.get("secrets/config-cdc.json")) + val config = + Jsons.readValue( + configString, + MsSqlServerSourceConfigurationSpecification::class.java, + ) + println("SGX config=$configString") + + val catalog = + SyncsTestFixture.configuredCatalogFromResource("catalog-cdc-single-stream.json") + CliRunner.source("discover", config).run() + println( + "SGX catalogString=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(catalog)}" + ) + + val readOutput: BufferingOutputConsumer = + CliRunner.source("read", config, catalog, listOf()).run() + // println("SGXX + // records=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.records())}") + println( + "SGXX: specs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.specs())}" + ) + println( + "SGXX logs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.logs())}" + ) + println( + "SGXX states=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.states())}" + ) + println( + "SGXX statuses=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.statuses())}" + ) + // println("SGXX + // messages=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.messages())}") + println( + "SGXX traces=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.traces())}" + ) + // println("SGXX + // readOutput=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput)}") + + } + + @Test + fun testSyncWithAlwaysNullCursor() { + // val container = + // MsSqlServerContainerFactory.shared(MsSqlServerContainerFactory.SQLSERVER_2022) + val configString = Files.readString(Paths.get("secrets/config-cdc.json")) + val config = + Jsons.readValue( + configString, + MsSqlServerSourceConfigurationSpecification::class.java, + ) + println("SGX config=$configString") + + val catalog = SyncsTestFixture.configuredCatalogFromResource("catalog-cdc-dbo-users.json") + CliRunner.source("discover", config).run() + println( + "SGX catalogString=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(catalog)}" + ) + + val readOutput: BufferingOutputConsumer = + CliRunner.source("read", config, catalog, listOf()).run() + // println("SGXX + // records=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.records())}") + println( + "SGXX: specs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.specs())}" + ) + println( + "SGXX logs=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.logs())}" + ) + println( + "SGXX states=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.states())}" + ) + println( + "SGXX statuses=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.statuses())}" + ) + // println("SGXX + // messages=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.messages())}") + println( + "SGXX traces=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.traces())}" + ) + // println("SGXX + // readOutput=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput)}") + + } + + @Test + fun testSyncEmptyTable() { + val configString = Files.readString(Paths.get("secrets/config-cdc.json")) + val config = + Jsons.readValue( + configString, + MsSqlServerSourceConfigurationSpecification::class.java, + ) + val configuredCatalog = + ConfiguredAirbyteCatalog() + .withStreams( + listOf( + ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withCursorField(listOf("born")) + .withPrimaryKey(listOf(listOf("born"))) + .withStream( + CatalogHelpers.createAirbyteStream( + "name_born", + "dbo", + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING) + ) + .withSupportedSyncModes(listOf(SyncMode.INCREMENTAL)) + ) + ) + ) + CliRunner.source("discover", config).run() + println( + "SGX catalogString=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(configuredCatalog)}" + ) + + val readOutput: BufferingOutputConsumer = + CliRunner.source("read", config, configuredCatalog, listOf()).run() + println( + "SGXX messages=${ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(readOutput.messages())}" + ) + } +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcDatatypeIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcDatatypeIntegrationTest.kt new file mode 100644 index 000000000000..bd35249cab45 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcDatatypeIntegrationTest.kt @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.github.oshai.kotlinlogging.KotlinLogging + +private val log = KotlinLogging.logger {} + +class MysqlCdcDatatypeIntegrationTest { + /* + @TestFactory + @Timeout(300) + fun syncTests(): Iterable { + val read: DynamicNode = + DynamicTest.dynamicTest("read") { + Assertions.assertFalse(LazyValues.actualReads.isEmpty()) + } + val cases: List = + allStreamNamesAndRecordData.keys.map { streamName: String -> + DynamicContainer.dynamicContainer( + streamName, + listOf( + DynamicTest.dynamicTest("records") { records(streamName) }, + ), + ) + } + return listOf(read) + cases + } + + object LazyValues { + val actualStreams: Map by lazy { + val output: BufferingOutputConsumer = CliRunner.source("discover", config()).run() + output.catalogs().firstOrNull()?.streams?.filterNotNull()?.associateBy { it.name } + ?: mapOf() + } + + val configuredCatalog: ConfiguredAirbyteCatalog by lazy { + val configuredStreams: List = + allStreamNamesAndRecordData.keys + .mapNotNull { actualStreams[it] } + .map { + CatalogHelpers.toDefaultConfiguredStream(it) + .withCursorField( + listOf(MysqlCdcMetaFields.CDC_CURSOR.id), + ) + } + + for (configuredStream in configuredStreams) { + if (configuredStream.stream.supportedSyncModes.contains(SyncMode.INCREMENTAL)) { + configuredStream.syncMode = SyncMode.INCREMENTAL + } + } + ConfiguredAirbyteCatalog().withStreams(configuredStreams) + } + + val allReadMessages: List by lazy { + // only get messsages from the 2nd run + val lastStateMessageFromFirstRun = + CliRunner.source("read", config(), configuredCatalog).run().states().last() + + // insert + connectionFactory + .get() + .also { it.isReadOnly = false } + .use { connection: Connection -> + for (case in testCases) { + for (sql in case.sqlInsertStatements) { + log.info { "test case ${case.id}: executing $sql" } + connection.createStatement().use { stmt -> stmt.execute(sql) } + } + } + } + + // Run it in dbz mode on 2nd time: + CliRunner.source( + "read", + config(), + configuredCatalog, + listOf(lastStateMessageFromFirstRun) + ) + .run() + .messages() + } + + val actualReads: Map by lazy { + val result: Map = + allStreamNamesAndRecordData.keys.associateWith { + BufferingOutputConsumer(ClockFactory().fixed()) + } + for (msg in allReadMessages) { + result[streamName(msg) ?: continue]?.accept(msg) + } + result + } + + fun streamName(msg: AirbyteMessage): String? = + when (msg.type) { + AirbyteMessage.Type.RECORD -> msg.record?.stream + else -> null + } + } + + private fun records(streamName: String) { + val actualRead: BufferingOutputConsumer? = LazyValues.actualReads[streamName] + Assertions.assertNotNull(actualRead) + + fun sortedRecordData(data: List): JsonNode = + Jsons.createArrayNode().apply { addAll(data.sortedBy { it.toString() }) } + + val actualRecords: List = actualRead?.records() ?: listOf() + + val records = actualRecords.mapNotNull { it.data } + + records.forEach { jsonNode -> + if (jsonNode is ObjectNode) { + // Remove unwanted fields + jsonNode.remove("_ab_cdc_updated_at") + jsonNode.remove("_ab_cdc_deleted_at") + jsonNode.remove("_ab_cdc_cursor") + jsonNode.remove("_ab_cdc_log_file") + jsonNode.remove("_ab_cdc_log_pos") + } + } + val actual: JsonNode = sortedRecordData(records) + + log.info { "test case $streamName: emitted records $actual" } + val expected: JsonNode = sortedRecordData(allStreamNamesAndRecordData[streamName]!!) + + Assertions.assertEquals(expected, actual) + } + + companion object { + lateinit var dbContainer: MSSQLServerContainer<*> + + fun config(): MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.cdcConfig(dbContainer) + + val connectionFactory: JdbcConnectionFactory by lazy { + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config())) + } + + val bitValues = + mapOf( + "b'1'" to "true", + "b'0'" to "false", + ) + + val longBitValues = + mapOf( + "b'10101010'" to """"qg=="""", + ) + + val stringValues = + mapOf( + "'abcdef'" to """"abcdef"""", + "'ABCD'" to """"ABCD"""", + "'OXBEEF'" to """"OXBEEF"""", + ) + + val yearValues = + mapOf( + "1992" to """1992""", + "2002" to """2002""", + "70" to """1970""", + ) + + val precisionTwoDecimalValues = + mapOf( + "0.2" to """0.2""", + ) + + val floatValues = + mapOf( + "123.4567" to """123.4567""", + ) + + val zeroPrecisionDecimalValues = + mapOf( + "2" to """2.0""", + ) + + val tinyintValues = + mapOf( + "10" to "10", + "4" to "4", + "2" to "2", + ) + + val intValues = + mapOf( + "10" to "10", + "100000000" to "100000000", + "200000000" to "200000000", + ) + + val dateValues = + mapOf( + "'2022-01-01'" to """"2022-01-01"""", + ) + + val timeValues = + mapOf( + "'14:30:00'" to """"14:30:00.000000"""", + ) + + val dateTimeValues = + mapOf( + "'2024-09-13 14:30:00'" to """"2024-09-13T14:30:00.000000"""", + "'2024-09-13T14:40:00+00:00'" to """"2024-09-13T14:40:00.000000"""", + ) + + val timestampValues = + mapOf( + "'2024-09-12 14:30:00'" to """"2024-09-12T14:30:00.000000Z"""", + "CONVERT_TZ('2024-09-12 14:30:00', 'America/Los_Angeles', 'UTC')" to + """"2024-09-12T21:30:00.000000Z"""", + ) + + val booleanValues = + mapOf( + "TRUE" to "true", + "FALSE" to "false", + ) + + val testCases: List = + listOf( + TestCase( + "BOOLEAN", + booleanValues, + airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN, + cursor = false, + ), + TestCase( + "VARCHAR(10)", + stringValues, + airbyteSchemaType = LeafAirbyteSchemaType.STRING, + ), + TestCase( + "DECIMAL(10,2)", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "DECIMAL(10,2) UNSIGNED", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "DECIMAL UNSIGNED", + zeroPrecisionDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "FLOAT", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "FLOAT(7,4)", + floatValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "FLOAT(53,8)", + floatValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "DOUBLE", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER + ), + TestCase( + "DOUBLE UNSIGNED", + precisionTwoDecimalValues, + airbyteSchemaType = LeafAirbyteSchemaType.NUMBER, + ), + TestCase( + "TINYINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "TINYINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "SMALLINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "MEDIUMINT", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase("BIGINT", intValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "SMALLINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "MEDIUMINT UNSIGNED", + tinyintValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase( + "BIGINT UNSIGNED", + intValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase("INT", intValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "INT UNSIGNED", + intValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + TestCase("DATE", dateValues, airbyteSchemaType = LeafAirbyteSchemaType.DATE), + TestCase( + "TIMESTAMP", + timestampValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE, + ), + TestCase( + "DATETIME", + dateTimeValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE, + ), + TestCase( + "TIME", + timeValues, + airbyteSchemaType = LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE, + ), + TestCase("YEAR", yearValues, airbyteSchemaType = LeafAirbyteSchemaType.INTEGER), + TestCase( + "BIT", + bitValues, + airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN, + cursor = false, + ), + TestCase( + "BIT(8)", + longBitValues, + airbyteSchemaType = LeafAirbyteSchemaType.INTEGER, + ), + ) + + val allStreamNamesAndRecordData: Map> = + testCases.flatMap { it.streamNamesToRecordData.toList() }.toMap() + + @JvmStatic + @BeforeAll + @Timeout(value = 300) + fun startAndProvisionTestContainer() { + dbContainer = + MsSqlServerContainerFactory.exclusive( + "mysql:8.0", + MsSqlServerContainerFactory.WithNetwork, + ) + + val gtidOn = + "SET @@GLOBAL.ENFORCE_GTID_CONSISTENCY = 'ON';" + + "SET @@GLOBAL.GTID_MODE = 'OFF_PERMISSIVE';" + + "SET @@GLOBAL.GTID_MODE = 'ON_PERMISSIVE';" + + "SET @@GLOBAL.GTID_MODE = 'ON';" + val grant = + "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT " + + "ON *.* TO '${dbContainer.username}'@'%';" + + dbContainer.execAsRoot(gtidOn) + dbContainer.execAsRoot(grant) + dbContainer.execAsRoot("FLUSH PRIVILEGES;") + connectionFactory + .get() + .also { it.isReadOnly = false } + .use { connection: Connection -> + for (case in testCases) { + for (sql in case.sqlStatements) { + log.info { "test case ${case.id}: executing $sql" } + connection.createStatement().use { stmt -> stmt.execute(sql) } + } + } + } + } + } + + data class TestCase( + val sqlType: String, + val sqlToAirbyte: Map, + val airbyteSchemaType: AirbyteSchemaType = LeafAirbyteSchemaType.STRING, + val cursor: Boolean = true, + val customDDL: List? = null, + ) { + val id: String + get() = + sqlType + .replace("[^a-zA-Z0-9]".toRegex(), " ") + .trim() + .replace(" +".toRegex(), "_") + .lowercase() + + val tableName: String + get() = "tbl_$id" + + val columnName: String + get() = "col_$id" + + val sqlStatements: List + get() { + return listOf( + "CREATE DATABASE IF NOT EXISTS test", + "USE test", + "CREATE TABLE IF NOT EXISTS $tableName " + "($columnName $sqlType PRIMARY KEY)", + "TRUNCATE TABLE $tableName", + ) + } + + val sqlInsertStatements: List + get() { + val result = + listOf("USE test;") + + sqlToAirbyte.keys.map { + "INSERT INTO $tableName ($columnName) VALUES ($it)" + } + return result + } + + val streamNamesToRecordData: Map> + get() { + val recordData: List = + sqlToAirbyte.values.map { Jsons.readTree("""{"${columnName}":$it}""") } + return mapOf(tableName to recordData) + } + } + + */ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcIntegrationTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcIntegrationTest.kt new file mode 100644 index 000000000000..1c27b4ef8a18 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlCdcIntegrationTest.kt @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +class MysqlCdcIntegrationTest { + /* + @Test + fun testCheck() { + val run1: BufferingOutputConsumer = CliRunner.source("check", config(), null).run() + + assertEquals(run1.messages().size, 1) + assertEquals( + run1.messages().first().connectionStatus.status, + AirbyteConnectionStatus.Status.SUCCEEDED + ) + + MsSqlServerContainerFactory.exclusive( + imageName = "mysql:8.0", + MsSqlServerContainerFactory.WithCdcOff, + ) + .use { nonCdcDbContainer -> + { + val invalidConfig: MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.config(nonCdcDbContainer).apply { + setMethodValue(CdcCursor()) + } + + val nonCdcConnectionFactory = + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(invalidConfig)) + + provisionTestContainer(nonCdcDbContainer, nonCdcConnectionFactory) + + val run2: BufferingOutputConsumer = + CliRunner.source("check", invalidConfig, null).run() + + val messageInRun2 = + run2 + .messages() + .filter { it.type == AirbyteMessage.Type.CONNECTION_STATUS } + .first() + + assertEquals( + AirbyteConnectionStatus.Status.FAILED, + messageInRun2.connectionStatus.status + ) + } + } + } + + @Test + fun test() { + CliRunner.source("read", config(), configuredCatalog).run() + // TODO: add assertions on run1 messages. + + connectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("INSERT INTO test.tbl (k, v) VALUES (3, 'baz')") + } + } + } + + @Test + fun testFullRefresh() { + val fullRefreshCatalog = + configuredCatalog.apply { streams.forEach { it.syncMode = SyncMode.FULL_REFRESH } } + CliRunner.source("read", config(), fullRefreshCatalog).run() + connectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("INSERT INTO test.tbl (k, v) VALUES (4, 'baz')") + } + } + } + + companion object { + val log = KotlinLogging.logger {} + lateinit var dbContainer: MSSQLServerContainer<*> + + fun config(): MsSqlServerSourceConfigurationSpecification = + MsSqlServerContainerFactory.config(dbContainer).apply { setMethodValue(CdcCursor()) } + + val connectionFactory: JdbcConnectionFactory by lazy { + JdbcConnectionFactory(MsSqlServerSourceConfigurationFactory().make(config())) + } + + val configuredCatalog: ConfiguredAirbyteCatalog = run { + val desc = StreamDescriptor().withName("tbl").withNamespace("test") + val discoveredStream = + DiscoveredStream( + id = StreamIdentifier.Companion.from(desc), + columns = listOf(Field("k", IntFieldType), Field("v", StringFieldType)), + primaryKeyColumnIDs = listOf(listOf("k")), + ) + val stream: AirbyteStream = MsSqlServerStreamFactory().createGlobal(discoveredStream) + val configuredStream: ConfiguredAirbyteStream = + CatalogHelpers.toDefaultConfiguredStream(stream) + .withSyncMode(SyncMode.INCREMENTAL) + .withPrimaryKey(discoveredStream.primaryKeyColumnIDs) + .withCursorField(listOf(MysqlCdcMetaFields.CDC_CURSOR.id)) + ConfiguredAirbyteCatalog().withStreams(listOf(configuredStream)) + } + + @JvmStatic + @BeforeAll + @Timeout(value = 300) + fun startAndProvisionTestContainer() { + dbContainer = + MsSqlServerContainerFactory.exclusive( + imageName = "mysql:8.0", + MsSqlServerContainerFactory.WithNetwork, + ) + provisionTestContainer(dbContainer, connectionFactory) + } + + fun provisionTestContainer( + targetContainer: MSSQLServerContainer<*>, + targetConnectionFactory: JdbcConnectionFactory + ) { + val gtidOn = + "SET @@GLOBAL.ENFORCE_GTID_CONSISTENCY = 'ON';" + + "SET @@GLOBAL.GTID_MODE = 'OFF_PERMISSIVE';" + + "SET @@GLOBAL.GTID_MODE = 'ON_PERMISSIVE';" + + "SET @@GLOBAL.GTID_MODE = 'ON';" + val grant = + "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT " + + "ON *.* TO '${targetContainer.username}'@'%';" + targetContainer.execAsRoot(gtidOn) + targetContainer.execAsRoot(grant) + targetContainer.execAsRoot("FLUSH PRIVILEGES;") + + targetConnectionFactory.get().use { connection: Connection -> + connection.isReadOnly = false + connection.createStatement().use { stmt: Statement -> + stmt.execute("CREATE TABLE test.tbl(k INT PRIMARY KEY, v VARCHAR(80))") + } + connection.createStatement().use { stmt: Statement -> + stmt.execute("INSERT INTO test.tbl (k, v) VALUES (1, 'foo'), (2, 'bar')") + } + } + } + } + + */ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlJdbcPartitionFactoryTest.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlJdbcPartitionFactoryTest.kt new file mode 100644 index 000000000000..948a20694bc8 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlJdbcPartitionFactoryTest.kt @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.read.* + +class MysqlJdbcPartitionFactoryTest { + /* + companion object { + private val selectQueryGenerator = MsSqlServerSelectQueryGenerator() + private val sharedState = sharedState() + private val cdcSharedState = sharedState(global = true) + private val config = mockk() + + val mysqlJdbcPartitionFactory = + DefaultJdbcPartitionFactory(sharedState, selectQueryGenerator, config) + val mysqlCdcJdbcPartitionFactory = + DefaultJdbcPartitionFactory(cdcSharedState, selectQueryGenerator, config) + + val fieldId = Field("id", IntFieldType) + val stream = + Stream( + id = + StreamIdentifier.from( + StreamDescriptor().withNamespace("test").withName("stream1") + ), + schema = setOf(fieldId), + configuredSyncMode = ConfiguredSyncMode.INCREMENTAL, + configuredPrimaryKey = listOf(fieldId), + configuredCursor = fieldId, + ) + val timestampFieldId = Field("id2", OffsetDateTimeFieldType) + + val timestampStream = + Stream( + id = + StreamIdentifier.from( + StreamDescriptor().withNamespace("test").withName("stream2") + ), + schema = setOf(timestampFieldId), + configuredSyncMode = ConfiguredSyncMode.INCREMENTAL, + configuredPrimaryKey = listOf(timestampFieldId), + configuredCursor = timestampFieldId, + ) + + val binaryFieldId = Field("id3", BinaryStreamFieldType) + + val binaryStream = + Stream( + id = + StreamIdentifier.from( + StreamDescriptor().withNamespace("test").withName("stream3") + ), + schema = setOf(binaryFieldId), + configuredSyncMode = ConfiguredSyncMode.INCREMENTAL, + configuredPrimaryKey = listOf(binaryFieldId), + configuredCursor = binaryFieldId, + ) + + private fun sharedState( + global: Boolean = false, + ): DefaultJdbcSharedState { + + val configSpec = + MsSqlServerSourceConfigurationSpecification().apply { + host = "" + port = 0 + username = "foo" + password = "bar" + database = "localhost" + } + if (global) { + configSpec.setMethodValue(CdcCursor()) + } else { + configSpec.setMethodValue(UserDefinedCursor) + } + val configFactory = MsSqlServerSourceConfigurationFactory() + val configuration = configFactory.make(configSpec) + + val mockSelectQuerier = mockk() + + return DefaultJdbcSharedState( + configuration, + mockSelectQuerier, + DefaultJdbcConstants(), + ConcurrencyResource(configuration), + NoOpGlobalLockResource() + ) + } + + private fun streamFeedBootstrap( + stream: Stream, + incumbentStateValue: OpaqueStateValue? = null + ) = + StreamFeedBootstrap( + outputConsumer = BufferingOutputConsumer(ClockFactory().fixed()), + metaFieldDecorator = + object : MetaFieldDecorator { + override val globalCursor: MetaField? = null + override val globalMetaFields: Set = emptySet() + + override fun decorateRecordData( + timestamp: OffsetDateTime, + globalStateValue: OpaqueStateValue?, + stream: Stream, + recordData: ObjectNode + ) {} + }, + stateQuerier = + object : StateQuerier { + override val feeds: List = listOf(stream) + override fun current(feed: Feed): OpaqueStateValue? = + if (feed == stream) incumbentStateValue else null + }, + stream, + ) + } + + @Test + fun testColdStartWithPkCursorBased() { + val jdbcPartition = mysqlJdbcPartitionFactory.create(streamFeedBootstrap(stream)) + assertTrue(jdbcPartition is MysqlJdbcSnapshotWithCursorPartition) + } + + @Test + fun testColdStartWithPkCdc() { + val jdbcPartition = mysqlCdcJdbcPartitionFactory.create(streamFeedBootstrap(stream)) + assertTrue(jdbcPartition is MysqlJdbcCdcSnapshotPartition) + } + + @Test + fun testColdStartWithoutPk() { + val streamWithoutPk = + Stream( + id = + StreamIdentifier.from( + StreamDescriptor().withNamespace("test").withName("stream7") + ), + schema = setOf(fieldId), + configuredSyncMode = ConfiguredSyncMode.INCREMENTAL, + configuredPrimaryKey = listOf(), + configuredCursor = fieldId, + ) + val jdbcPartition = mysqlJdbcPartitionFactory.create(streamFeedBootstrap(streamWithoutPk)) + assertTrue(jdbcPartition is MysqlJdbcNonResumableSnapshotWithCursorPartition) + } + + @Test + fun testResumeFromCompletedCursorBasedRead() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "cursor": "2", + "version": 2, + "state_type": "cursor_based", + "stream_name": "stream1", + "cursor_field": [ + "id" + ], + "stream_namespace": "test", + "cursor_record_count": 1 + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue)) + assertTrue(jdbcPartition is MysqlJdbcCursorIncrementalPartition) + } + + @Test + fun testResumeFromCompletedCursorBasedReadTimestamp() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "cursor": "2025-09-03T05:23:35", + "version": 2, + "state_type": "cursor_based", + "stream_name": "stream2", + "cursor_field": [ + "id2" + ], + "stream_namespace": "test", + "cursor_record_count": 1 + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlJdbcPartitionFactory.create( + streamFeedBootstrap(timestampStream, incomingStateValue) + ) + assertTrue(jdbcPartition is MysqlJdbcCursorIncrementalPartition) + + assertEquals( + Jsons.valueToTree("2025-09-02T05:23:35.000000Z"), + (jdbcPartition as MysqlJdbcCursorIncrementalPartition).cursorLowerBound + ) + } + + @Test + fun testResumeFromCursorBasedReadInitialRead() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "pk_val": "9063170", + "pk_name": "id", + "version": 2, + "state_type": "primary_key", + "incremental_state": {} + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue)) + + assertTrue(jdbcPartition is MysqlJdbcSnapshotWithCursorPartition) + } + + @Test + fun testResumeFromCdcInitialRead() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "pk_val": "29999", + "pk_name": "id", + "version": 2, + "state_type": "primary_key", + "incremental_state": {} + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlCdcJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue)) + assertTrue(jdbcPartition is MysqlJdbcCdcSnapshotPartition) + } + + @Test + fun testResumeFromCdcInitialReadComplete() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "stream_name": "stream1", + "cursor_field": [], + "stream_namespace": "test" + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlCdcJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue)) + assertNull(jdbcPartition) + } + + @Test + fun testResumeFromCompletedCursorBasedReadBinary() { + val incomingStateValue: OpaqueStateValue = + Jsons.readTree( + """ + { + "cursor": "OQAAAAAAAAAAAAAAAAAAAA==", + "version": 2, + "state_type": "cursor_based", + "stream_name": "stream3", + "cursor_field": [ + "id3" + ], + "stream_namespace": "test", + "cursor_record_count": 1 + } + """.trimIndent() + ) + + val jdbcPartition = + mysqlJdbcPartitionFactory.create(streamFeedBootstrap(binaryStream, incomingStateValue)) + assertTrue(jdbcPartition is MysqlJdbcCursorIncrementalPartition) + + assertEquals( + Jsons.valueToTree(Base64.getDecoder().decode("OQAAAAAAAAAAAAAAAAAAAA==")), + (jdbcPartition as MysqlJdbcCursorIncrementalPartition).cursorLowerBound + ) + } + */ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlSourceTestConfigurationFactory.kt b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlSourceTestConfigurationFactory.kt new file mode 100644 index 000000000000..7802af256cb6 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/kotlin/io/airbyte/integrations/source/mssql/MysqlSourceTestConfigurationFactory.kt @@ -0,0 +1,28 @@ +/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */ +package io.airbyte.integrations.source.mssql + +import io.airbyte.cdk.command.FeatureFlag +import io.airbyte.cdk.command.SourceConfigurationFactory +import io.airbyte.integrations.source.mssql.config_spec.MsSqlServerSourceConfigurationSpecification +import io.micronaut.context.annotation.Primary +import io.micronaut.context.annotation.Requires +import io.micronaut.context.env.Environment +import jakarta.inject.Singleton + +@Singleton +@Requires(env = [Environment.TEST]) +@Primary +class MysqlSourceTestConfigurationFactory(val featureFlags: Set) : + SourceConfigurationFactory< + MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration> { + override fun makeWithoutExceptionHandling( + pojo: MsSqlServerSourceConfigurationSpecification, + ): MsSqlServerSourceConfiguration = + MsSqlServerSourceConfigurationFactory(featureFlags).makeWithoutExceptionHandling(pojo) + /*.copy( + maxConcurrency = 1, + checkpointTargetInterval = Duration.ofSeconds(3), + debeziumHeartbeatInterval = Duration.ofMillis(100), + debeziumKeepAliveInterval = Duration.ofSeconds(1), + )*/ +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-dbo-users.json b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-dbo-users.json new file mode 100644 index 000000000000..d296eaca5bca --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-dbo-users.json @@ -0,0 +1,144 @@ +{ + "streams": [ + { + "fields": [ + { + "name": "occupation", + "type": "STRING" + }, + { + "name": "gender", + "type": "STRING" + }, + { + "name": "academic_degree", + "type": "STRING" + }, + { + "name": "weight", + "type": "INTEGER" + }, + { + "name": "created_at", + "type": "STRING" + }, + { + "name": "language", + "type": "STRING" + }, + { + "name": "telephone", + "type": "STRING" + }, + { + "name": "title", + "type": "STRING" + }, + { + "name": "updated_at", + "type": "TIMESTAMP_WITHOUT_TIMEZONE" + }, + { + "name": "nationality", + "type": "STRING" + }, + { + "name": "blood_type", + "type": "STRING" + }, + { + "name": "name", + "type": "STRING" + }, + { + "name": "id", + "type": "INTEGER" + }, + { + "name": "age", + "type": "INTEGER" + }, + { + "name": "email", + "type": "STRING" + }, + { + "name": "height", + "type": "NUMBER" + } + ], + "stream": { + "name": "users", + "namespace": "dbo", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "number", + "airbyte_type": "integer" + }, + "age": { + "type": "number", + "airbyte_type": "integer" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "title": { + "type": "string" + }, + "gender": { + "type": "string" + }, + "height": { + "type": "number" + }, + "weight": { + "type": "number", + "airbyte_type": "integer" + }, + "language": { + "type": "string" + }, + "telephone": { + "type": "string" + }, + "blood_type": { + "type": "string" + }, + "created_at": { + "type": "string", + "contentEncoding": "base64" + }, + "occupation": { + "type": "string" + }, + "updated_at": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "nationality": { + "type": "string" + }, + "academic_degree": { + "type": "string" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "incremental", + "primary_key": [["id"]], + "cursor_field": ["academic_degree"], + "destination_sync_mode": "append_dedup" + } + ] +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-single-stream.json b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-single-stream.json new file mode 100644 index 000000000000..6f11b3150809 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc-single-stream.json @@ -0,0 +1,50 @@ +{ + "streams": [ + { + "fields": [ + { + "name": "bin", + "type": "STRING" + }, + { + "name": "id", + "type": "STRING" + }, + { + "name": "pmid", + "type": "INTEGER" + } + ], + "stream": { + "name": "NewTable", + "namespace": "dbo", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "bin": { + "type": "string", + "contentEncoding": "base64" + }, + "pmid": { + "type": "number", + "airbyte_type": "integer" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["pmid"]] + }, + "mappers": [], + "sync_mode": "incremental", + "primary_key": [["pmid"]], + "cursor_field": ["pmid"], + "destination_sync_mode": "append_dedup" + } + ] +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc.json b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc.json new file mode 100644 index 000000000000..f20504118758 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/catalog-cdc.json @@ -0,0 +1,1174 @@ +{ + "streams": [ + { + "fields": [ + { + "name": "column_id", + "type": "INTEGER" + }, + { + "name": "masking_function", + "type": "STRING" + }, + { + "name": "column_name", + "type": "STRING" + }, + { + "name": "is_computed", + "type": "BOOLEAN" + }, + { + "name": "column_type", + "type": "STRING" + }, + { + "name": "object_id", + "type": "INTEGER" + }, + { + "name": "column_ordinal", + "type": "INTEGER" + } + ], + "stream": { + "name": "captured_columns", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "column_id": { + "type": "number", + "airbyte_type": "integer" + }, + "object_id": { + "type": "number", + "airbyte_type": "integer" + }, + "column_name": { + "type": "string" + }, + "column_type": { + "type": "string" + }, + "is_computed": { + "type": "boolean" + }, + "column_ordinal": { + "type": "number", + "airbyte_type": "integer" + }, + "masking_function": { + "type": "string" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["object_id"], ["column_ordinal"]] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [["object_id"], ["column_ordinal"]], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "partition_switch", + "type": "BOOLEAN" + }, + { + "name": "has_drop_pending", + "type": "BOOLEAN" + }, + { + "name": "supports_net_changes", + "type": "BOOLEAN" + }, + { + "name": "version", + "type": "INTEGER" + }, + { + "name": "object_id", + "type": "INTEGER" + }, + { + "name": "role_name", + "type": "STRING" + }, + { + "name": "start_lsn", + "type": "STRING" + }, + { + "name": "filegroup_name", + "type": "STRING" + }, + { + "name": "source_object_id", + "type": "INTEGER" + }, + { + "name": "end_lsn", + "type": "STRING" + }, + { + "name": "create_date", + "type": "TIMESTAMP_WITHOUT_TIMEZONE" + }, + { + "name": "index_name", + "type": "STRING" + }, + { + "name": "capture_instance", + "type": "STRING" + } + ], + "stream": { + "name": "change_tables", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "end_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "version": { + "type": "number", + "airbyte_type": "integer" + }, + "object_id": { + "type": "number", + "airbyte_type": "integer" + }, + "role_name": { + "type": "string" + }, + "start_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "index_name": { + "type": "string" + }, + "create_date": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "filegroup_name": { + "type": "string" + }, + "capture_instance": { + "type": "string" + }, + "has_drop_pending": { + "type": "boolean" + }, + "partition_switch": { + "type": "boolean" + }, + "source_object_id": { + "type": "number", + "airbyte_type": "integer" + }, + "supports_net_changes": { + "type": "boolean" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["object_id"]] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [["object_id"]], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "bin", + "type": "STRING" + }, + { + "name": "id", + "type": "STRING" + } + ], + "stream": { + "name": "data_type", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "bin": { + "type": "string", + "contentEncoding": "base64" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "__$seqval", + "type": "STRING" + }, + { + "name": "__$command_id", + "type": "INTEGER" + }, + { + "name": "bin", + "type": "STRING" + }, + { + "name": "__$end_lsn", + "type": "STRING" + }, + { + "name": "__$update_mask", + "type": "STRING" + }, + { + "name": "__$operation", + "type": "INTEGER" + }, + { + "name": "id", + "type": "STRING" + }, + { + "name": "__$start_lsn", + "type": "STRING" + } + ], + "stream": { + "name": "dbo_NewTable_CT", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "bin": { + "type": "string", + "contentEncoding": "base64" + }, + "__$seqval": { + "type": "string", + "contentEncoding": "base64" + }, + "__$end_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "__$operation": { + "type": "number", + "airbyte_type": "integer" + }, + "__$start_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "__$command_id": { + "type": "number", + "airbyte_type": "integer" + }, + "__$update_mask": { + "type": "string", + "contentEncoding": "base64" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "__$seqval", + "type": "STRING" + }, + { + "name": "__$command_id", + "type": "INTEGER" + }, + { + "name": "FirstName", + "type": "STRING" + }, + { + "name": "__$end_lsn", + "type": "STRING" + }, + { + "name": "__$update_mask", + "type": "STRING" + }, + { + "name": "__$operation", + "type": "INTEGER" + }, + { + "name": "ID", + "type": "INTEGER" + }, + { + "name": "LastName", + "type": "STRING" + }, + { + "name": "Age", + "type": "INTEGER" + }, + { + "name": "__$start_lsn", + "type": "STRING" + } + ], + "stream": { + "name": "dbo_Persons_CT", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "ID": { + "type": "number", + "airbyte_type": "integer" + }, + "Age": { + "type": "number", + "airbyte_type": "integer" + }, + "LastName": { + "type": "string" + }, + "FirstName": { + "type": "string" + }, + "__$seqval": { + "type": "string", + "contentEncoding": "base64" + }, + "__$end_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "__$operation": { + "type": "number", + "airbyte_type": "integer" + }, + "__$start_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "__$command_id": { + "type": "number", + "airbyte_type": "integer" + }, + "__$update_mask": { + "type": "string", + "contentEncoding": "base64" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "__$seqval", + "type": "STRING" + }, + { + "name": "__$command_id", + "type": "INTEGER" + }, + { + "name": "__$end_lsn", + "type": "STRING" + }, + { + "name": "__$update_mask", + "type": "STRING" + }, + { + "name": "__$operation", + "type": "INTEGER" + }, + { + "name": "id", + "type": "INTEGER" + }, + { + "name": "test_column", + "type": "STRING" + }, + { + "name": "__$start_lsn", + "type": "STRING" + } + ], + "stream": { + "name": "dbo_dbo_1_datetimeoffset_CT", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "number", + "airbyte_type": "integer" + }, + "__$seqval": { + "type": "string", + "contentEncoding": "base64" + }, + "__$end_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "test_column": { + "type": "string" + }, + "__$operation": { + "type": "number", + "airbyte_type": "integer" + }, + "__$start_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "__$command_id": { + "type": "number", + "airbyte_type": "integer" + }, + "__$update_mask": { + "type": "string", + "contentEncoding": "base64" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "ddl_time", + "type": "TIMESTAMP_WITHOUT_TIMEZONE" + }, + { + "name": "ddl_command", + "type": "STRING" + }, + { + "name": "source_object_id", + "type": "INTEGER" + }, + { + "name": "required_column_update", + "type": "BOOLEAN" + }, + { + "name": "ddl_lsn", + "type": "STRING" + }, + { + "name": "object_id", + "type": "INTEGER" + } + ], + "stream": { + "name": "ddl_history", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "ddl_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "ddl_time": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "object_id": { + "type": "number", + "airbyte_type": "integer" + }, + "ddl_command": { + "type": "string" + }, + "source_object_id": { + "type": "number", + "airbyte_type": "integer" + }, + "required_column_update": { + "type": "boolean" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["object_id"], ["ddl_lsn"]] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [["object_id"], ["ddl_lsn"]], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "column_id", + "type": "INTEGER" + }, + { + "name": "index_ordinal", + "type": "INTEGER" + }, + { + "name": "column_name", + "type": "STRING" + }, + { + "name": "object_id", + "type": "INTEGER" + } + ], + "stream": { + "name": "index_columns", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "column_id": { + "type": "number", + "airbyte_type": "integer" + }, + "object_id": { + "type": "number", + "airbyte_type": "integer" + }, + "column_name": { + "type": "string" + }, + "index_ordinal": { + "type": "number", + "airbyte_type": "integer" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [ + ["object_id"], + ["index_ordinal"], + ["column_id"] + ] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [["object_id"], ["index_ordinal"], ["column_id"]], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "start_lsn", + "type": "STRING" + }, + { + "name": "tran_begin_time", + "type": "TIMESTAMP_WITHOUT_TIMEZONE" + }, + { + "name": "tran_id", + "type": "STRING" + }, + { + "name": "tran_end_time", + "type": "TIMESTAMP_WITHOUT_TIMEZONE" + }, + { + "name": "tran_begin_lsn", + "type": "STRING" + } + ], + "stream": { + "name": "lsn_time_mapping", + "namespace": "cdc", + "json_schema": { + "type": "object", + "properties": { + "tran_id": { + "type": "string", + "contentEncoding": "base64" + }, + "start_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "tran_end_time": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "tran_begin_lsn": { + "type": "string", + "contentEncoding": "base64" + }, + "tran_begin_time": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["start_lsn"]] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [["start_lsn"]], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "bin", + "type": "STRING" + }, + { + "name": "id", + "type": "STRING" + }, + { + "name": "pmid", + "type": "INTEGER" + } + ], + "stream": { + "name": "NewTable", + "namespace": "dbo", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "bin": { + "type": "string", + "contentEncoding": "base64" + }, + "pmid": { + "type": "number", + "airbyte_type": "integer" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["pmid"]] + }, + "mappers": [], + "sync_mode": "incremental", + "primary_key": [["pmid"]], + "cursor_field": ["pmid"], + "destination_sync_mode": "append_dedup" + }, + { + "fields": [ + { + "name": "FirstName", + "type": "STRING" + }, + { + "name": "ID", + "type": "INTEGER" + }, + { + "name": "LastName", + "type": "STRING" + }, + { + "name": "Age", + "type": "INTEGER" + } + ], + "stream": { + "name": "Persons", + "namespace": "dbo", + "json_schema": { + "type": "object", + "properties": { + "ID": { + "type": "number", + "airbyte_type": "integer" + }, + "Age": { + "type": "number", + "airbyte_type": "integer" + }, + "LastName": { + "type": "string" + }, + "FirstName": { + "type": "string" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["ID"]] + }, + "mappers": [], + "sync_mode": "incremental", + "primary_key": [["ID"]], + "cursor_field": ["ID"], + "destination_sync_mode": "append_dedup" + }, + { + "fields": [ + { + "name": "d1", + "type": "TIMESTAMP_WITHOUT_TIMEZONE" + }, + { + "name": "d2", + "type": "STRING" + }, + { + "name": "d3", + "type": "STRING" + } + ], + "stream": { + "name": "datetime", + "namespace": "dbo", + "json_schema": { + "type": "object", + "properties": { + "d1": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "d2": { + "type": "string" + }, + "d3": { + "type": "string" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "datetime", + "type": "TIMESTAMP_WITHOUT_TIMEZONE" + }, + { + "name": "id", + "type": "INTEGER" + }, + { + "name": "ts_time", + "type": "STRING" + }, + { + "name": "test_column", + "type": "STRING" + } + ], + "stream": { + "name": "dbo_1_datetimeoffset", + "namespace": "dbo", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "number", + "airbyte_type": "integer" + }, + "ts_time": { + "type": "string", + "contentEncoding": "base64" + }, + "datetime": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "test_column": { + "type": "string" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["id"]] + }, + "mappers": [], + "sync_mode": "incremental", + "primary_key": [["id"]], + "cursor_field": ["id"], + "destination_sync_mode": "append_dedup" + }, + { + "fields": [ + { + "name": "tabid", + "type": "INTEGER" + }, + { + "name": "startlsn", + "type": "STRING" + }, + { + "name": "endlsn", + "type": "STRING" + }, + { + "name": "typeid", + "type": "INTEGER" + } + ], + "stream": { + "name": "systranschemas", + "namespace": "dbo", + "json_schema": { + "type": "object", + "properties": { + "tabid": { + "type": "number", + "airbyte_type": "integer" + }, + "endlsn": { + "type": "string", + "contentEncoding": "base64" + }, + "typeid": { + "type": "number", + "airbyte_type": "integer" + }, + "startlsn": { + "type": "string", + "contentEncoding": "base64" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "occupation", + "type": "STRING" + }, + { + "name": "gender", + "type": "STRING" + }, + { + "name": "academic_degree", + "type": "STRING" + }, + { + "name": "weight", + "type": "INTEGER" + }, + { + "name": "created_at", + "type": "STRING" + }, + { + "name": "language", + "type": "STRING" + }, + { + "name": "telephone", + "type": "STRING" + }, + { + "name": "title", + "type": "STRING" + }, + { + "name": "updated_at", + "type": "TIMESTAMP_WITHOUT_TIMEZONE" + }, + { + "name": "nationality", + "type": "STRING" + }, + { + "name": "blood_type", + "type": "STRING" + }, + { + "name": "name", + "type": "STRING" + }, + { + "name": "id", + "type": "INTEGER" + }, + { + "name": "age", + "type": "INTEGER" + }, + { + "name": "email", + "type": "STRING" + }, + { + "name": "height", + "type": "NUMBER" + } + ], + "stream": { + "name": "users", + "namespace": "dbo", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "number", + "airbyte_type": "integer" + }, + "age": { + "type": "number", + "airbyte_type": "integer" + }, + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "title": { + "type": "string" + }, + "gender": { + "type": "string" + }, + "height": { + "type": "number" + }, + "weight": { + "type": "number", + "airbyte_type": "integer" + }, + "language": { + "type": "string" + }, + "telephone": { + "type": "string" + }, + "blood_type": { + "type": "string" + }, + "created_at": { + "type": "string", + "contentEncoding": "base64" + }, + "occupation": { + "type": "string" + }, + "updated_at": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "nationality": { + "type": "string" + }, + "academic_degree": { + "type": "string" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "incremental", + "primary_key": [["id"]], + "cursor_field": ["academic_degree"], + "destination_sync_mode": "append_dedup" + }, + { + "fields": [ + { + "name": "born", + "type": "STRING" + }, + { + "name": "name", + "type": "STRING" + }, + { + "name": "id", + "type": "INTEGER" + } + ], + "stream": { + "name": "id_name_born", + "namespace": "no_cdc", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "number", + "airbyte_type": "integer" + }, + "born": { + "type": "string" + }, + "name": { + "type": "string" + } + } + }, + "is_resumable": true, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [["id"]] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [["id"]], + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "fields": [ + { + "name": "born", + "type": "STRING" + }, + { + "name": "name", + "type": "STRING" + } + ], + "stream": { + "name": "name_born", + "namespace": "no_cdc", + "json_schema": { + "type": "object", + "properties": { + "born": { + "type": "string" + }, + "name": { + "type": "string" + } + } + }, + "default_cursor_field": [], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "source_defined_primary_key": [] + }, + "mappers": [], + "sync_mode": "full_refresh", + "primary_key": [], + "cursor_field": [], + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/container-license-acceptance.txt b/airbyte-integrations/connectors/source-mssql/src/test/resources/container-license-acceptance.txt new file mode 100644 index 000000000000..7f099b0aa4e8 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/container-license-acceptance.txt @@ -0,0 +1 @@ +mcr.microsoft.com/mssql/server:2022-latest diff --git a/airbyte-integrations/connectors/source-mssql/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-mssql/src/test/resources/expected_spec.json new file mode 100644 index 000000000000..029f803edce7 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/test/resources/expected_spec.json @@ -0,0 +1,203 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/sources/mssql", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MSSQL Source Spec", + "type": "object", + "required": ["host", "port", "database", "username", "password"], + "properties": { + "host": { + "description": "The hostname of the database.", + "title": "Host", + "type": "string", + "order": 0 + }, + "port": { + "description": "The port of the database.", + "title": "Port", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "examples": ["1433"], + "default": 3306, + "order": 1 + }, + "database": { + "description": "The name of the database.", + "title": "Database", + "type": "string", + "examples": ["master"], + "order": 2 + }, + "schemas": { + "title": "Schemas", + "description": "The list of schemas to sync from. Defaults to user. Case sensitive.", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0, + "uniqueItems": true, + "default": ["dbo"], + "order": 3 + }, + "username": { + "description": "The username which is used to access the database.", + "title": "Username", + "type": "string", + "order": 4 + }, + "password": { + "description": "The password associated with the username.", + "title": "Password", + "type": "string", + "airbyte_secret": true, + "order": 5 + }, + "jdbc_url_params": { + "title": "JDBC URL Params", + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "type": "string", + "order": 6 + }, + "ssl_method": { + "title": "SSL Method", + "type": "object", + "description": "The encryption method which is used when communicating with the database.", + "order": 7, + "oneOf": [ + { + "type": "object", + "title": "Unencrypted", + "description": "Data transfer will not be encrypted.", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "default": "unencrypted", + "enum": ["unencrypted"] + } + }, + "additionalProperties": true + }, + { + "type": "object", + "title": "Encrypted (trust server certificate)", + "description": "Use the certificate provided by the server without verification. (For testing purposes only!)", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "default": "encrypted_trust_server_certificate", + "enum": ["encrypted_trust_server_certificate"] + } + }, + "additionalProperties": true + }, + { + "type": "object", + "title": "Encrypted (verify certificate)", + "description": "Verify and use the certificate provided by the server.", + "required": ["ssl_method"], + "properties": { + "ssl_method": { + "type": "string", + "default": "encrypted_verify_certificate", + "enum": ["encrypted_verify_certificate"] + }, + "hostNameInCertificate": { + "title": "Host Name In Certificate", + "type": "string", + "description": "Specifies the host name of the server. The value of this property must match the subject property of the certificate.", + "order": 0 + }, + "certificate": { + "title": "Certificate", + "type": "string", + "description": "certificate of the server, or of the CA that signed the server certificate", + "order": 1, + "airbyte_secret": true, + "multiline": true + } + }, + "additionalProperties": true + } + ] + }, + "replication_method": { + "type": "object", + "title": "Update Method", + "description": "Configures how data is extracted from the database.", + "default": "CDC", + "display_type": "radio", + "order": 8, + "oneOf": [ + { + "type": "object", + "title": "Read Changes using Change Data Capture (CDC)", + "description": "Recommended - Incrementally reads new inserts, updates, and deletes using the SQL Server's change data capture feature. This must be enabled on your database.", + "required": ["method"], + "properties": { + "method": { + "type": "string", + "default": "CDC", + "enum": ["CDC"] + }, + "initial_waiting_seconds": { + "type": "integer", + "title": "Initial Waiting Time in Seconds (Advanced)", + "description": "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds.", + "default": 300, + "min": 120, + "max": 3600, + "order": 1 + }, + "invalid_cdc_cursor_position_behavior": { + "type": "string", + "title": "Invalid CDC position behavior (Advanced)", + "description": "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.", + "enum": ["Fail sync", "Re-sync data"], + "default": "Fail sync", + "order": 2 + }, + "queue_size": { + "type": "integer", + "title": "Size of the queue (Advanced)", + "description": "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.", + "default": 10000, + "order": 3, + "min": 1000, + "max": 10000 + }, + "initial_load_timeout_hours": { + "type": "integer", + "title": "Initial Load Timeout in Hours (Advanced)", + "description": "The amount of time an initial load is allowed to continue for before catching up on CDC logs.", + "default": 8, + "min": 4, + "max": 24, + "order": 4 + } + }, + "additionalProperties": true + }, + { + "type": "object", + "title": "Scan Changes with User Defined Cursor", + "description": "Incrementally detects new inserts and updates using the cursor column chosen when configuring a connection (e.g. created_at, updated_at).", + "required": ["method"], + "properties": { + "method": { + "type": "string", + "default": "STANDARD", + "enum": ["STANDARD"] + } + }, + "additionalProperties": true + } + ] + } + }, + "additionalProperties": true + } +} diff --git a/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlSourceConfigurationSpecification.kt b/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlSourceConfigurationSpecification.kt index f2741de20053..0154caac3279 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlSourceConfigurationSpecification.kt +++ b/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlSourceConfigurationSpecification.kt @@ -98,7 +98,7 @@ class MysqlSourceConfigurationSpecification : ConfigurationSpecification() { @JsonGetter("ssl_mode") @JsonSchemaTitle("Encryption") @JsonPropertyDescription( - "The encryption method with is used when communicating with the database.", + "The encryption method which is used when communicating with the database.", ) @JsonSchemaInject(json = """{"order":8}""") fun getEncryptionValue(): Encryption? = encryptionJson ?: encryption.asEncryption() @@ -321,7 +321,7 @@ data object UserDefinedCursor : CursorMethodConfiguration @JsonSchemaDescription( "Recommended - " + "Incrementally reads new inserts, updates, and deletes using Mysql's change data capture feature. This must be enabled on your database.", ) class CdcCursor : CursorMethodConfiguration { diff --git a/airbyte-integrations/connectors/source-mysql/src/test/resources/expected-spec.json b/airbyte-integrations/connectors/source-mysql/src/test/resources/expected-spec.json index 329b2434bd72..1c0fe1cde5c3 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/resources/expected-spec.json +++ b/airbyte-integrations/connectors/source-mysql/src/test/resources/expected-spec.json @@ -154,7 +154,7 @@ ], "order": 8, "title": "Encryption", - "description": "The encryption method with is used when communicating with the database." + "description": "The encryption method which is used when communicating with the database." }, "username": { "type": "string", @@ -365,7 +365,7 @@ "description": "Enter the configured MySQL server timezone. This should only be done if the configured timezone in your MySQL instance does not conform to IANNA standard." } }, - "description": "Recommended - Incrementally reads new inserts, updates, and deletes using Mysql's change data capture feature. This must be enabled on your database.", + "description": "Recommended - Incrementally reads new inserts, updates, and deletes using Mysql's change data capture feature. This must be enabled on your database.", "additionalProperties": true } ],