Skip to content

Commit

Permalink
Increase memory allocation for JDBC buffers in source DB connectors (#…
Browse files Browse the repository at this point in the history
…20939)

* Bump total allocation of the source's available memory for the JDBC buffer from 60% to 70%

* Profiling setup

* Update Dockerfile

* Fixes

* Update FetchSizeConstants.java

* Update dockerfiles

* Update JdbcStreamingQueryConfig.java

* auto-bump connector version

* Manually bump source-mysql

Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
  • Loading branch information
akashkulk and octavia-squidington-iii authored Jan 27, 2023
1 parent 65fc1bc commit 9ccdeb9
Show file tree
Hide file tree
Showing 12 changed files with 20 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,7 @@
- name: MySQL
sourceDefinitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad
dockerRepository: airbyte/source-mysql
dockerImageTag: 1.0.20
dockerImageTag: 1.0.21
documentationUrl: https://docs.airbyte.com/integrations/sources/mysql
icon: mysql.svg
sourceType: database
Expand Down Expand Up @@ -1341,7 +1341,7 @@
- name: Postgres
sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750
dockerRepository: airbyte/source-postgres
dockerImageTag: 1.0.40
dockerImageTag: 1.0.41
documentationUrl: https://docs.airbyte.com/integrations/sources/postgres
icon: postgresql.svg
sourceType: database
Expand Down
4 changes: 2 additions & 2 deletions airbyte-config/init/src/main/resources/seed/source_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9096,7 +9096,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-mysql:1.0.20"
- dockerImage: "airbyte/source-mysql:1.0.21"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/mysql"
connectionSpecification:
Expand Down Expand Up @@ -11596,7 +11596,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-postgres:1.0.40"
- dockerImage: "airbyte/source-postgres:1.0.41"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/postgres"
connectionSpecification:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ public final class FetchSizeConstants {
// limited by the heap size.
public static final double TARGET_BUFFER_SIZE_RATIO = 0.6;
public static final long MIN_BUFFER_BYTE_SIZE = 250L * 1024L * 1024L; // 250 MB
public static final long MAX_BUFFER_BYTE_SIZE = 1024L * 1024L * 1024L; // 1GB
// sample size for making the first estimation of the row size
public static final int INITIAL_SAMPLE_SIZE = 10;
// sample every N rows during the post-initial stage
public static final int SAMPLE_FREQUENCY = 100;

public static final int MIN_FETCH_SIZE = 1;
public static final int DEFAULT_FETCH_SIZE = 1000;
public static final int MAX_FETCH_SIZE = 1_000_000;
public static final int MAX_FETCH_SIZE = 2_000_000;

private FetchSizeConstants() {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
import java.sql.SQLException;
import java.sql.Statement;

/*
* Interface that defines how to stream results from a Jdbc database. This involves determining
* updating what the fetch size should be based on the size of the existing rows. 1. The config
* initializes the fetch size and sets up the estimator. 2. The config then accepts each row and
* feeds it to the estimator. If the estimator has a new estimate, it updates the fetch size.
*/

public interface JdbcStreamingQueryConfig extends CheckedBiConsumer<ResultSet, Object, SQLException> {

void initialize(final Connection connection, final Statement statement) throws SQLException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ static long getTargetBufferByteSize(final Long maxMemory) {
return FetchSizeConstants.MIN_BUFFER_BYTE_SIZE;
}
final long targetBufferByteSize = Math.round(maxMemory * FetchSizeConstants.TARGET_BUFFER_SIZE_RATIO);
final long finalBufferByteSize = Math.min(FetchSizeConstants.MAX_BUFFER_BYTE_SIZE,
Math.max(FetchSizeConstants.MIN_BUFFER_BYTE_SIZE, targetBufferByteSize));
final long finalBufferByteSize = Math.max(FetchSizeConstants.MIN_BUFFER_BYTE_SIZE, targetBufferByteSize);
LOGGER.info("Max memory limit: {}, JDBC buffer size: {}", maxMemory, finalBufferByteSize);
return finalBufferByteSize;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@ void testGetTargetBufferByteSize() {
TwoStageSizeEstimator.getTargetBufferByteSize(Long.MAX_VALUE));
assertEquals(FetchSizeConstants.MIN_BUFFER_BYTE_SIZE,
TwoStageSizeEstimator.getTargetBufferByteSize(FetchSizeConstants.MIN_BUFFER_BYTE_SIZE - 10L));
assertEquals(FetchSizeConstants.MAX_BUFFER_BYTE_SIZE,
TwoStageSizeEstimator.getTargetBufferByteSize(
Math.round(FetchSizeConstants.MAX_BUFFER_BYTE_SIZE / FetchSizeConstants.TARGET_BUFFER_SIZE_RATIO + 10L)));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ ENV APPLICATION source-mysql-strict-encrypt

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.20
LABEL io.airbyte.version=1.0.21

LABEL io.airbyte.name=airbyte/source-mysql-strict-encrypt
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-mysql/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ ENV APPLICATION source-mysql

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.20
LABEL io.airbyte.version=1.0.21

LABEL io.airbyte.name=airbyte/source-mysql
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres-strict-encrypt

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.40
LABEL io.airbyte.version=1.0.41
LABEL io.airbyte.name=airbyte/source-postgres-strict-encrypt
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-postgres/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.40
LABEL io.airbyte.version=1.0.41
LABEL io.airbyte.name=airbyte/source-postgres
1 change: 1 addition & 0 deletions docs/integrations/sources/mysql.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ WHERE actor_definition_id ='435bb9a5-7887-4809-aa58-28c27df0d7ad' AND (configura

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:-----------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 1.0.21 | 2022-01-25 | [20939](https://github.com/airbytehq/airbyte/pull/20939) | Adjust batch selection memory limits databases. |
| 1.0.20 | 2023-01-24 | [20593](https://github.com/airbytehq/airbyte/pull/20593) | Handle ssh time out exception |
| 1.0.19 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources |
| 1.0.18 | 2022-12-14 | [20378](https://github.com/airbytehq/airbyte/pull/20378) | Improve descriptions |
Expand Down
3 changes: 2 additions & 1 deletion docs/integrations/sources/postgres.md
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,8 @@ The root causes is that the WALs needed for the incremental sync has been remove

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 1.0.40 | 2023-01-24 | [21825](https://github.com/airbytehq/airbyte/pull/21825) | Put back the original change that will cause an incremental sync to error if table contains a NULL value in cursor column. |
| 1.0.41 | 2022-01-25 | [20939](https://github.com/airbytehq/airbyte/pull/20939) | Adjust batch selection memory limits databases. |
| 1.0.40 | 2023-01-24 | [21825](https://github.com/airbytehq/airbyte/pull/21825) | Put back the original change that will cause an incremental sync to error if table contains a NULL value in cursor column. |
| 1.0.39 | 2023-01-20 | [21683](https://github.com/airbytehq/airbyte/pull/21683) | Speed up esmtimates for trace messages in non-CDC mode. |
| 1.0.38 | 2023-01-17 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources |
| 1.0.37 | 2023-01-17 | [20783](https://github.com/airbytehq/airbyte/pull/20783) | Emit estimate trace messages for non-CDC mode. |
Expand Down

0 comments on commit 9ccdeb9

Please sign in to comment.