Skip to content

Commit

Permalink
feat: Filter out system views out of system namespaces (#22427)
Browse files Browse the repository at this point in the history
* This changes allows to filter out system views created out of system namespaces

* Add extra view

* Fix issue

* Bump Postgres source version

* bump version

* Bump alloydb

* Bump versioning

* auto-bump connector version

* Set default implementation and remove from unneeded places

---------

Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
  • Loading branch information
sergio-ropero and octavia-squidington-iii authored Feb 9, 2023
1 parent 3a3365a commit 1807c8f
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
- name: AlloyDB for PostgreSQL
sourceDefinitionId: 1fa90628-2b9e-11ed-a261-0242ac120002
dockerRepository: airbyte/source-alloydb
dockerImageTag: 1.0.43
dockerImageTag: 1.0.44
documentationUrl: https://docs.airbyte.com/integrations/sources/alloydb
icon: alloydb.svg
sourceType: database
Expand Down Expand Up @@ -1376,7 +1376,7 @@
- name: Postgres
sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750
dockerRepository: airbyte/source-postgres
dockerImageTag: 1.0.43
dockerImageTag: 1.0.44
documentationUrl: https://docs.airbyte.com/integrations/sources/postgres
icon: postgresql.svg
sourceType: database
Expand Down
4 changes: 2 additions & 2 deletions airbyte-config/init/src/main/resources/seed/source_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-alloydb:1.0.43"
- dockerImage: "airbyte/source-alloydb:1.0.44"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/postgres"
connectionSpecification:
Expand Down Expand Up @@ -11617,7 +11617,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-postgres:1.0.43"
- dockerImage: "airbyte/source-postgres:1.0.44"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/postgres"
connectionSpecification:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-alloydb-strict-encrypt

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.43
LABEL io.airbyte.version=1.0.44
LABEL io.airbyte.name=airbyte/source-alloydb-strict-encrypt
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-alloydb/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-alloydb

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.43
LABEL io.airbyte.version=1.0.44
LABEL io.airbyte.name=airbyte/source-alloydb
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres-strict-encrypt

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.43
LABEL io.airbyte.version=1.0.44
LABEL io.airbyte.name=airbyte/source-postgres-strict-encrypt
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-postgres/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.43
LABEL io.airbyte.version=1.0.44
LABEL io.airbyte.name=airbyte/source-postgres
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,11 @@ public Set<String> getExcludedInternalNameSpaces() {
return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history");
}

@Override
protected Set<String> getExcludedViews() {
return Set.of("pg_stat_statements", "pg_stat_statements_info");
}

@Override
public AirbyteCatalog discover(final JsonNode config) throws Exception {
final AirbyteCatalog catalog = super.discover(config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ public Set<String> getExcludedInternalNameSpaces() {
return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history");
}

@Override
protected Set<String> getExcludedViews() {
return Set.of("pg_stat_statements", "pg_stat_statements_info");
}

public static void main(final String[] args) throws Exception {
final Source source = new PostgresTestSource();
LOGGER.info("starting source: {}", PostgresTestSource.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,11 @@ private List<TableInfo<CommonField<DataType>>> discoverWithoutSystemTables(
final Database database)
throws Exception {
final Set<String> systemNameSpaces = getExcludedInternalNameSpaces();
final Set<String> systemViews = getExcludedViews();
final List<TableInfo<CommonField<DataType>>> discoveredTables = discoverInternal(database);
return (systemNameSpaces == null || systemNameSpaces.isEmpty() ? discoveredTables
: discoveredTables.stream()
.filter(table -> !systemNameSpaces.contains(table.getNameSpace())).collect(
.filter(table -> !systemNameSpaces.contains(table.getNameSpace()) && !systemViews.contains(table.getName())).collect(
Collectors.toList()));
}

Expand Down Expand Up @@ -667,12 +668,19 @@ protected abstract List<CheckedConsumer<Database, Exception>> getCheckOperations
protected abstract JsonSchemaType getAirbyteType(DataType columnType);

/**
* Get list of system namespaces(schemas) in order to exclude them from the discover result list.
* Get list of system namespaces(schemas) in order to exclude them from the `discover` result list.
*
* @return set of system namespaces(schemas) to be excluded
*/
protected abstract Set<String> getExcludedInternalNameSpaces();

/**
* Get list of system views in order to exclude them from the `discover` result list.
*
* @return set of views to be excluded
*/
protected Set<String> getExcludedViews() { return Collections.emptySet(); };

/**
* Discover all available tables in the source database.
*
Expand Down
25 changes: 13 additions & 12 deletions docs/integrations/sources/alloydb.md
Original file line number Diff line number Diff line change
Expand Up @@ -325,15 +325,16 @@ According to Postgres [documentation](https://www.postgresql.org/docs/14/datatyp

## Changelog

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------|
| 1.0.43 | 2022-02-06 | [21634](https://github.com/airbytehq/airbyte/pull/21634) | Improve Standard sync performance by caching objects.|
| 1.0.36 | 2023-01-24 | [21825](https://github.com/airbytehq/airbyte/pull/21825) | Put back the original change that will cause an incremental sync to error if table contains a NULL value in cursor column.|
| 1.0.35 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources |
| 1.0.34 | 2022-12-13 | [20378](https://github.com/airbytehq/airbyte/pull/20378) | Improve descriptions |
| 1.0.17 | 2022-10-31 | [18538](https://github.com/airbytehq/airbyte/pull/18538) | Encode database name |
| 1.0.16 | 2022-10-25 | [18256](https://github.com/airbytehq/airbyte/pull/18256) | Disable allow and prefer ssl modes in CDC mode |
| | 2022-10-13 | [15535](https://github.com/airbytehq/airbyte/pull/16238) | Update incremental query to avoid data missing when new data is inserted at the same time as a sync starts under non-CDC incremental mode |
| 1.0.15 | 2022-10-11 | [17782](https://github.com/airbytehq/airbyte/pull/17782) | Align with Postgres source v.1.0.15 |
| 1.0.0 | 2022-09-15 | [16776](https://github.com/airbytehq/airbyte/pull/16776) | Align with strict-encrypt version |
| 0.1.0 | 2022-09-05 | [16323](https://github.com/airbytehq/airbyte/pull/16323) | Initial commit. Based on source-postgres v.1.0.7 |
| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 1.0.44 | 2022-02-06 | [22221](https://github.com/airbytehq/airbyte/pull/22221) | Exclude new set of system tables when using `pg_stat_statements` extension. |
| 1.0.43 | 2022-02-06 | [21634](https://github.com/airbytehq/airbyte/pull/21634) | Improve Standard sync performance by caching objects. |
| 1.0.36 | 2023-01-24 | [21825](https://github.com/airbytehq/airbyte/pull/21825) | Put back the original change that will cause an incremental sync to error if table contains a NULL value in cursor column. |
| 1.0.35 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources |
| 1.0.34 | 2022-12-13 | [20378](https://github.com/airbytehq/airbyte/pull/20378) | Improve descriptions |
| 1.0.17 | 2022-10-31 | [18538](https://github.com/airbytehq/airbyte/pull/18538) | Encode database name |
| 1.0.16 | 2022-10-25 | [18256](https://github.com/airbytehq/airbyte/pull/18256) | Disable allow and prefer ssl modes in CDC mode |
| | 2022-10-13 | [15535](https://github.com/airbytehq/airbyte/pull/16238) | Update incremental query to avoid data missing when new data is inserted at the same time as a sync starts under non-CDC incremental mode |
| 1.0.15 | 2022-10-11 | [17782](https://github.com/airbytehq/airbyte/pull/17782) | Align with Postgres source v.1.0.15 |
| 1.0.0 | 2022-09-15 | [16776](https://github.com/airbytehq/airbyte/pull/16776) | Align with strict-encrypt version |
| 0.1.0 | 2022-09-05 | [16323](https://github.com/airbytehq/airbyte/pull/16323) | Initial commit. Based on source-postgres v.1.0.7 |
1 change: 1 addition & 0 deletions docs/integrations/sources/postgres.md
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ The root causes is that the WALs needed for the incremental sync has been remove

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 1.0.44 | 2022-02-06 | [22221](https://github.com/airbytehq/airbyte/pull/22221) | Exclude new set of system tables when using `pg_stat_statements` extension. |
| 1.0.43 | 2022-02-06 | [21634](https://github.com/airbytehq/airbyte/pull/21634) | Improve Standard sync performance by caching objects. |
| 1.0.42 | 2022-01-23 | [21523](https://github.com/airbytehq/airbyte/pull/21523) | Check for null in cursor values before replacing. |
| 1.0.41 | 2022-01-25 | [20939](https://github.com/airbytehq/airbyte/pull/20939) | Adjust batch selection memory limits databases. |
Expand Down

0 comments on commit 1807c8f

Please sign in to comment.