From b31b7b875c8fb33942e4d5a4b22996d0bbd23099 Mon Sep 17 00:00:00 2001 From: Sergio Ropero Date: Wed, 1 Feb 2023 18:44:12 +0100 Subject: [PATCH 1/4] This changes allows to filter out system views created out of system namespaces --- .../integrations/source/bigquery/BigQuerySource.java | 5 +++++ .../airbyte/integrations/source/jdbc/JdbcSource.java | 5 +++++ .../MongoDbSource.java | 5 +++++ .../integrations/source/postgres/PostgresSource.java | 5 +++++ .../source/relationaldb/AbstractDbSource.java | 10 +++++++++- 5 files changed, 29 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index bae1c07ff076..b4965d4343ba 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -101,6 +101,11 @@ public Set getExcludedInternalNameSpaces() { return Collections.emptySet(); } + @Override + protected Set getExcludedViews() { + return Collections.emptySet(); + } + @Override protected List>> discoverInternal(final BigQueryDatabase database) throws Exception { return discoverInternal(database, null); diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java index ecc54b6fd25d..61431b900972 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java @@ -34,6 +34,11 @@ public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } + @Override + protected Set getExcludedViews() { + return Set.of("pg_stat_statements"); + } + public static void main(final String[] args) throws Exception { final Source source = new JdbcSource(); LOGGER.info("starting source: {}", JdbcSource.class); diff --git a/airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io.airbyte.integrations.source.mongodb/MongoDbSource.java b/airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io.airbyte.integrations.source.mongodb/MongoDbSource.java index 97095a4bb4bd..9c536e244469 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io.airbyte.integrations.source.mongodb/MongoDbSource.java +++ b/airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io.airbyte.integrations.source.mongodb/MongoDbSource.java @@ -96,6 +96,11 @@ public Set getExcludedInternalNameSpaces() { return Collections.emptySet(); } + @Override + protected Set getExcludedViews() { + return Collections.emptySet(); + } + @Override protected List>> discoverInternal(final MongoDatabase database) throws Exception { diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 06df1a710c47..e276548e7da2 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -202,6 +202,11 @@ public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } + @Override + protected Set getExcludedViews() { + return Set.of("pg_stat_statements", "pg_stat_statements_info"); + } + @Override public AirbyteCatalog discover(final JsonNode config) throws Exception { final AirbyteCatalog catalog = super.discover(config); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index dd724538d47f..719a224a7577 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -271,10 +271,11 @@ private List>> discoverWithoutSystemTables( final Database database) throws Exception { final Set systemNameSpaces = getExcludedInternalNameSpaces(); + final Set systemViews = getExcludedViews(); final List>> discoveredTables = discoverInternal(database); return (systemNameSpaces == null || systemNameSpaces.isEmpty() ? discoveredTables : discoveredTables.stream() - .filter(table -> !systemNameSpaces.contains(table.getNameSpace())).collect( + .filter(table -> !systemNameSpaces.contains(table.getNameSpace()) && !systemViews.contains(table.getName())).collect( Collectors.toList())); } @@ -640,6 +641,13 @@ protected abstract List> getCheckOperations */ protected abstract Set getExcludedInternalNameSpaces(); + /** + * Get list of system tables in order to exclude them from the discover result list. + * + * @return set of tables to be excluded + */ + protected abstract Set getExcludedViews(); + /** * Discover all available tables in the source database. * From 6a9460b9bdbe780244d2cb4ab63ba0f334e31355 Mon Sep 17 00:00:00 2001 From: Sergio Ropero Date: Wed, 1 Feb 2023 18:47:40 +0100 Subject: [PATCH 2/4] Add extra view --- .../java/io/airbyte/integrations/source/jdbc/JdbcSource.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java index 61431b900972..964f7ed04449 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java @@ -36,7 +36,7 @@ public Set getExcludedInternalNameSpaces() { @Override protected Set getExcludedViews() { - return Set.of("pg_stat_statements"); + return Set.of("pg_stat_statements", "pg_stat_statements_info"); } public static void main(final String[] args) throws Exception { From c617d4772d7b12a09ccfe74db5dd9a0d3854b99f Mon Sep 17 00:00:00 2001 From: Sergio Ropero Date: Thu, 2 Feb 2023 15:20:01 +0100 Subject: [PATCH 3/4] Fix issue --- .../integrations/source/postgres/PostgresStressTest.java | 5 +++++ .../integrations/source/relationaldb/AbstractDbSource.java | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java index bdd5ac2e017c..4c21d7390ec2 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java @@ -123,6 +123,11 @@ public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } + @Override + protected Set getExcludedViews() { + return Set.of("pg_stat_statements", "pg_stat_statements_info"); + } + public static void main(final String[] args) throws Exception { final Source source = new PostgresTestSource(); LOGGER.info("starting source: {}", PostgresTestSource.class); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index 719a224a7577..06934731423b 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -635,16 +635,16 @@ protected abstract List> getCheckOperations protected abstract JsonSchemaType getAirbyteType(DataType columnType); /** - * Get list of system namespaces(schemas) in order to exclude them from the discover result list. + * Get list of system namespaces(schemas) in order to exclude them from the `discover` result list. * * @return set of system namespaces(schemas) to be excluded */ protected abstract Set getExcludedInternalNameSpaces(); /** - * Get list of system tables in order to exclude them from the discover result list. + * Get list of system views in order to exclude them from the `discover` result list. * - * @return set of tables to be excluded + * @return set of views to be excluded */ protected abstract Set getExcludedViews(); From 7394465f8a9de11c78a52397735283d44e3d12cd Mon Sep 17 00:00:00 2001 From: Sergio Ropero Date: Mon, 6 Feb 2023 09:29:09 -0800 Subject: [PATCH 4/4] Bump Postgres source version --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-postgres-strict-encrypt/Dockerfile | 2 +- airbyte-integrations/connectors/source-postgres/Dockerfile | 2 +- docs/integrations/sources/postgres.md | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 4b87f1e73457..2847e7f55a06 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -1362,7 +1362,7 @@ - name: Postgres sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750 dockerRepository: airbyte/source-postgres - dockerImageTag: 1.0.42 + dockerImageTag: 1.0.43 documentationUrl: https://docs.airbyte.com/integrations/sources/postgres icon: postgresql.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 6d93b5ef780f..d7efd47adc3b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -11606,7 +11606,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-postgres:1.0.42" +- dockerImage: "airbyte/source-postgres:1.0.43" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/postgres" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile index 972cbb614037..71c118d54497 100644 --- a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=1.0.42 +LABEL io.airbyte.version=1.0.43 LABEL io.airbyte.name=airbyte/source-postgres-strict-encrypt diff --git a/airbyte-integrations/connectors/source-postgres/Dockerfile b/airbyte-integrations/connectors/source-postgres/Dockerfile index f05fd2e03c1f..bae41a4ddc26 100644 --- a/airbyte-integrations/connectors/source-postgres/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=1.0.42 +LABEL io.airbyte.version=1.0.43 LABEL io.airbyte.name=airbyte/source-postgres diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index a92717f6f397..1c6bd8a4d11d 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -411,6 +411,7 @@ The root causes is that the WALs needed for the incremental sync has been remove | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.0.43 | 2022-02-06 | [22221](https://github.com/airbytehq/airbyte/pull/22221) | Exclude new set of system tables when using `pg_stat_statements` extension. | | 1.0.42 | 2022-01-23 | [21523](https://github.com/airbytehq/airbyte/pull/21523) | Check for null in cursor values before replacing. | | 1.0.41 | 2022-01-25 | [20939](https://github.com/airbytehq/airbyte/pull/20939) | Adjust batch selection memory limits databases. | | 1.0.40 | 2023-01-24 | [21825](https://github.com/airbytehq/airbyte/pull/21825) | Put back the original change that will cause an incremental sync to error if table contains a NULL value in cursor column. |