From 2351c4610620a7533fb88491de31e23035740bf6 Mon Sep 17 00:00:00 2001 From: Brian Matt Date: Wed, 29 Dec 2021 09:08:03 -0500 Subject: [PATCH 01/12] Added buffer size config and implemented in csv/jsonl writer. Adjusted printwriter to disable autoflush as well. --- .../AzureBlobStorageDestinationConfig.java | 13 +++++++++++++ .../csv/AzureBlobStorageCsvWriter.java | 9 +++++---- .../jsonl/AzureBlobStorageJsonlWriter.java | 7 ++++--- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java index fb2777db8a5e..9740c7d5ef32 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java @@ -15,6 +15,7 @@ public class AzureBlobStorageDestinationConfig { private final String accountName; private final String accountKey; private final String containerName; + private final int outputStreamBufferSize; private final AzureBlobStorageFormatConfig formatConfig; public AzureBlobStorageDestinationConfig( @@ -22,11 +23,13 @@ public AzureBlobStorageDestinationConfig( final String accountName, final String accountKey, final String containerName, + final int outputStreamBufferSize, final AzureBlobStorageFormatConfig formatConfig) { this.endpointUrl = endpointUrl; this.accountName = accountName; this.accountKey = accountKey; this.containerName = containerName; + this.outputStreamBufferSize = outputStreamBufferSize; this.formatConfig = formatConfig; } @@ -50,12 +53,21 @@ public AzureBlobStorageFormatConfig getFormatConfig() { return formatConfig; } + public int getOutputStreamBufferSize() { + return outputStreamBufferSize; + } + public static AzureBlobStorageDestinationConfig getAzureBlobStorageConfig(final JsonNode config) { final String accountNameFomConfig = config.get("azure_blob_storage_account_name").asText(); final String accountKeyFromConfig = config.get("azure_blob_storage_account_key").asText(); final JsonNode endpointFromConfig = config .get("azure_blob_storage_endpoint_domain_name"); final JsonNode containerName = config.get("azure_blob_storage_container_name"); + final int outputStreamBufferSizeFromConfig = config.get("azure_blob_storage_output_buffer_size").asInt(); + if (outputStreamBufferSizeFromConfig == null) { + // Default to 100MB buffer size + outputStreamBufferSizeFromConfig = 1024 * 1024 * 100; + } final JsonNode blobName = config.get("azure_blob_storage_blob_name"); // streamId final String endpointComputed = String.format(Locale.ROOT, DEFAULT_STORAGE_ENDPOINT_FORMAT, @@ -72,6 +84,7 @@ public static AzureBlobStorageDestinationConfig getAzureBlobStorageConfig(final accountNameFomConfig, accountKeyFromConfig, containerNameComputed, + outputStreamBufferSizeFromConfig, AzureBlobStorageFormatConfigs.getAzureBlobStorageFormatConfig(config)); } diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java index 420202eac6d1..23e31bbf4d9c 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java @@ -11,6 +11,7 @@ import io.airbyte.integrations.destination.azure_blob_storage.writer.BaseAzureBlobStorageWriter; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.io.BufferedOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.charset.StandardCharsets; @@ -28,7 +29,7 @@ public class AzureBlobStorageCsvWriter extends BaseAzureBlobStorageWriter implem private final CsvSheetGenerator csvSheetGenerator; private final CSVPrinter csvPrinter; - private final BlobOutputStream blobOutputStream; + private final BufferedOutputStream blobOutputStream; public AzureBlobStorageCsvWriter(final AzureBlobStorageDestinationConfig config, final AppendBlobClient appendBlobClient, @@ -44,17 +45,17 @@ public AzureBlobStorageCsvWriter(final AzureBlobStorageDestinationConfig config, .create(configuredStream.getStream().getJsonSchema(), formatConfig); - this.blobOutputStream = appendBlobClient.getBlobOutputStream(); + this.blobOutputStream = new BufferedOutputStream(appendBlobClient.getBlobOutputStream(), config.getOutputStreamBufferSize()); if (isNewlyCreatedBlob) { this.csvPrinter = new CSVPrinter( - new PrintWriter(blobOutputStream, true, StandardCharsets.UTF_8), + new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8), CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) .withHeader(csvSheetGenerator.getHeaderRow().toArray(new String[0]))); } else { // no header required for append this.csvPrinter = new CSVPrinter( - new PrintWriter(blobOutputStream, true, StandardCharsets.UTF_8), + new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8), CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL)); } } diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java index aed0849ede5e..c198e8edbd36 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java @@ -17,6 +17,7 @@ import io.airbyte.integrations.destination.azure_blob_storage.writer.BaseAzureBlobStorageWriter; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.io.BufferedOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.charset.StandardCharsets; @@ -32,7 +33,7 @@ public class AzureBlobStorageJsonlWriter extends BaseAzureBlobStorageWriter impl private static final ObjectMapper MAPPER = MoreMappers.initMapper(); private static final ObjectWriter WRITER = MAPPER.writer(); - private final BlobOutputStream blobOutputStream; + private final BufferedOutputStream blobOutputStream; private final PrintWriter printWriter; public AzureBlobStorageJsonlWriter(final AzureBlobStorageDestinationConfig config, @@ -41,8 +42,8 @@ public AzureBlobStorageJsonlWriter(final AzureBlobStorageDestinationConfig confi final boolean isNewlyCreatedBlob) { super(config, appendBlobClient, configuredStream); // at this moment we already receive appendBlobClient initialized - this.blobOutputStream = appendBlobClient.getBlobOutputStream(); - this.printWriter = new PrintWriter(blobOutputStream, true, StandardCharsets.UTF_8); + this.blobOutputStream = mew BufferedOutputStream(appendBlobClient.getBlobOutputStream(), config.getOutputStreamBufferSize()); + this.printWriter = new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8); } @Override From ba8f30d97f4221769aa76298cc9b0d2b48494b4f Mon Sep 17 00:00:00 2001 From: Brian Matt Date: Wed, 29 Dec 2021 09:16:06 -0500 Subject: [PATCH 02/12] Typo fix --- .../azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java index c198e8edbd36..6a0406be7a7e 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java @@ -42,7 +42,7 @@ public AzureBlobStorageJsonlWriter(final AzureBlobStorageDestinationConfig confi final boolean isNewlyCreatedBlob) { super(config, appendBlobClient, configuredStream); // at this moment we already receive appendBlobClient initialized - this.blobOutputStream = mew BufferedOutputStream(appendBlobClient.getBlobOutputStream(), config.getOutputStreamBufferSize()); + this.blobOutputStream = new BufferedOutputStream(appendBlobClient.getBlobOutputStream(), config.getOutputStreamBufferSize()); this.printWriter = new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8); } From 4122bf3f11fc4112fe60297d408d9c5ace07e45d Mon Sep 17 00:00:00 2001 From: Brian Matt Date: Wed, 29 Dec 2021 09:18:44 -0500 Subject: [PATCH 03/12] Fixed null compariston for int type --- .../azure_blob_storage/AzureBlobStorageDestinationConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java index 9740c7d5ef32..2577b64262a0 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java @@ -64,7 +64,7 @@ public static AzureBlobStorageDestinationConfig getAzureBlobStorageConfig(final .get("azure_blob_storage_endpoint_domain_name"); final JsonNode containerName = config.get("azure_blob_storage_container_name"); final int outputStreamBufferSizeFromConfig = config.get("azure_blob_storage_output_buffer_size").asInt(); - if (outputStreamBufferSizeFromConfig == null) { + if (outputStreamBufferSizeFromConfig == 0) { // Default to 100MB buffer size outputStreamBufferSizeFromConfig = 1024 * 1024 * 100; } From 3227f05e9585e742acdcee7dea9532d281d4b831 Mon Sep 17 00:00:00 2001 From: Brian Matt Date: Wed, 29 Dec 2021 10:16:23 -0500 Subject: [PATCH 04/12] Added default constant for buffer size. Fixed how output buffer size is pulled for backwards compatibility. Added title for account key. --- .../AzureBlobStorageDestinationConfig.java | 17 +++++++++++------ .../AzureBlobStorageDestinationConstants.java | 1 + .../src/main/resources/spec.json | 7 +++++++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java index 2577b64262a0..b227534f6fc6 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java @@ -29,7 +29,12 @@ public AzureBlobStorageDestinationConfig( this.accountName = accountName; this.accountKey = accountKey; this.containerName = containerName; - this.outputStreamBufferSize = outputStreamBufferSize; + if (outputStreamBufferSize > 0) { + this.outputStreamBufferSize = outputStreamBufferSize; + } else { + // Handle input of 0 on settings form + this.outputStreamBufferSize = DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE; + } this.formatConfig = formatConfig; } @@ -63,11 +68,11 @@ public static AzureBlobStorageDestinationConfig getAzureBlobStorageConfig(final final JsonNode endpointFromConfig = config .get("azure_blob_storage_endpoint_domain_name"); final JsonNode containerName = config.get("azure_blob_storage_container_name"); - final int outputStreamBufferSizeFromConfig = config.get("azure_blob_storage_output_buffer_size").asInt(); - if (outputStreamBufferSizeFromConfig == 0) { - // Default to 100MB buffer size - outputStreamBufferSizeFromConfig = 1024 * 1024 * 100; - } + final int outputStreamBufferSizeFromConfig = + config.get("azure_blob_storage_output_buffer_size") != null + ? config.get("azure_blob_storage_output_buffer_size").asInt(DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE) + : DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE; + final JsonNode blobName = config.get("azure_blob_storage_blob_name"); // streamId final String endpointComputed = String.format(Locale.ROOT, DEFAULT_STORAGE_ENDPOINT_FORMAT, diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConstants.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConstants.java index 1cd15481997c..737bb5f63422 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConstants.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConstants.java @@ -10,6 +10,7 @@ public final class AzureBlobStorageDestinationConstants { public static final String DEFAULT_STORAGE_ENDPOINT_HTTP_PROTOCOL = "https"; public static final String DEFAULT_STORAGE_ENDPOINT_DOMAIN_NAME = "blob.core.windows.net"; public static final String DEFAULT_STORAGE_ENDPOINT_FORMAT = "%s://%s.%s"; + public static final int DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE = 1024 * 1024 * 100; // 100MB private AzureBlobStorageDestinationConstants() {} diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json index b3b7c6ea7897..e8339fa6ee5b 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json @@ -35,6 +35,7 @@ "examples": ["airbyte5storage"] }, "azure_blob_storage_account_key": { + "title": "Azure Blob Storage account key", "description": "The Azure blob storage account key.", "airbyte_secret": true, "type": "string", @@ -42,6 +43,12 @@ "Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd==" ] }, + "azure_blob_storage_output_buffer_size": { + "title": "Azure Blob Storage output buffer size, in bytes.", + "type": "integer", + "description": "The amount of bytes to buffer for the output stream to Azure. This will impact memory footprint on workers, but may need adjustment for performance and appropriate block size in Azure.", + "examples": [104857600] + }, "format": { "title": "Output Format", "type": "object", From 51e0c8dfdff2fb54f4fd263ff758bfc5d3ab3c2f Mon Sep 17 00:00:00 2001 From: Brian Matt Date: Wed, 29 Dec 2021 10:25:20 -0500 Subject: [PATCH 05/12] ticked up version in Dockerfile --- .../connectors/destination-azure-blob-storage/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile b/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile index a33b5ab5272d..8e644aa025fe 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-azure-blob-storage COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/destination-azure-blob-storage From ef031e4dc16cb42f2b97617b4f15fd17ea8251fa Mon Sep 17 00:00:00 2001 From: Brian Matt Date: Wed, 29 Dec 2021 10:51:50 -0500 Subject: [PATCH 06/12] Documentation updates --- .../destination-azure-blob-storage/src/main/resources/spec.json | 2 +- docs/integrations/destinations/azureblobstorage.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json index e8339fa6ee5b..f643bddea24a 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json @@ -44,7 +44,7 @@ ] }, "azure_blob_storage_output_buffer_size": { - "title": "Azure Blob Storage output buffer size, in bytes.", + "title": "Azure Blob Storage output buffer size", "type": "integer", "description": "The amount of bytes to buffer for the output stream to Azure. This will impact memory footprint on workers, but may need adjustment for performance and appropriate block size in Azure.", "examples": [104857600] diff --git a/docs/integrations/destinations/azureblobstorage.md b/docs/integrations/destinations/azureblobstorage.md index 8c8c29b26068..830cc7821ab6 100644 --- a/docs/integrations/destinations/azureblobstorage.md +++ b/docs/integrations/destinations/azureblobstorage.md @@ -22,6 +22,7 @@ The Airbyte Azure Blob Storage destination allows you to sync data to Azure Blob | Azure blob storage container \(Bucket\) Name | string | A name of the Azure blob storage container. If not exists - will be created automatically. If leave empty, then will be created automatically airbytecontainer+timestamp. | | Azure Blob Storage account name | string | The account's name of the Azure Blob Storage. | | The Azure blob storage account key | string | Azure blob storage account key. Example: `abcdefghijklmnopqrstuvwxyz/0123456789+ABCDEFGHIJKLMNOPQRSTUVWXYZ/0123456789%++sampleKey==`. | +| Azure Blob Storage output buffer size | integer | Azure Blob Storage output buffer size, in bytes. Exmaple: 104857600 | | Format | object | Format specific configuration. See below for details. | ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured blob will be wiped out before each sync. We recommend you to provision a dedicated Azure Blob Storage Container resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ @@ -137,4 +138,5 @@ They will be like this in the output file: | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | | 0.1.0 | 2021-08-30 | [\#5332](https://github.com/airbytehq/airbyte/pull/5332) | Initial release with JSONL and CSV output. | +| 0.1.1 | 2021-12-29 | | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | From ddcd67c959039618d91d294d6bfbf5fc182fc1cb Mon Sep 17 00:00:00 2001 From: Brian Matt Date: Wed, 29 Dec 2021 10:55:09 -0500 Subject: [PATCH 07/12] Added PR link to changelog --- docs/integrations/destinations/azureblobstorage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/destinations/azureblobstorage.md b/docs/integrations/destinations/azureblobstorage.md index 830cc7821ab6..3d9213351cd0 100644 --- a/docs/integrations/destinations/azureblobstorage.md +++ b/docs/integrations/destinations/azureblobstorage.md @@ -138,5 +138,5 @@ They will be like this in the output file: | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | | 0.1.0 | 2021-08-30 | [\#5332](https://github.com/airbytehq/airbyte/pull/5332) | Initial release with JSONL and CSV output. | -| 0.1.1 | 2021-12-29 | | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | +| 0.1.1 | 2021-12-29 | [\#5332](https://github.com/airbytehq/airbyte/pull/9190) | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | From 6fc7b97ef0e08734343db398cd4177f3e5b6b2c4 Mon Sep 17 00:00:00 2001 From: bmatticus Date: Tue, 4 Jan 2022 09:29:25 -0500 Subject: [PATCH 08/12] Fixed typo in doc --- docs/integrations/destinations/azureblobstorage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/destinations/azureblobstorage.md b/docs/integrations/destinations/azureblobstorage.md index 3d9213351cd0..e64d01600854 100644 --- a/docs/integrations/destinations/azureblobstorage.md +++ b/docs/integrations/destinations/azureblobstorage.md @@ -22,7 +22,7 @@ The Airbyte Azure Blob Storage destination allows you to sync data to Azure Blob | Azure blob storage container \(Bucket\) Name | string | A name of the Azure blob storage container. If not exists - will be created automatically. If leave empty, then will be created automatically airbytecontainer+timestamp. | | Azure Blob Storage account name | string | The account's name of the Azure Blob Storage. | | The Azure blob storage account key | string | Azure blob storage account key. Example: `abcdefghijklmnopqrstuvwxyz/0123456789+ABCDEFGHIJKLMNOPQRSTUVWXYZ/0123456789%++sampleKey==`. | -| Azure Blob Storage output buffer size | integer | Azure Blob Storage output buffer size, in bytes. Exmaple: 104857600 | +| Azure Blob Storage output buffer size | integer | Azure Blob Storage output buffer size, in bytes. Example: 104857600 | | Format | object | Format specific configuration. See below for details. | ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured blob will be wiped out before each sync. We recommend you to provision a dedicated Azure Blob Storage Container resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ From fa4fbcadbf08128ce7fd957203bfd7f46389200d Mon Sep 17 00:00:00 2001 From: bmatticus Date: Tue, 4 Jan 2022 09:42:01 -0500 Subject: [PATCH 09/12] Added min/max/default to buffer size --- .../src/main/resources/spec.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json index f643bddea24a..2b6d7b40e0d4 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json @@ -47,6 +47,9 @@ "title": "Azure Blob Storage output buffer size", "type": "integer", "description": "The amount of bytes to buffer for the output stream to Azure. This will impact memory footprint on workers, but may need adjustment for performance and appropriate block size in Azure.", + "minimum": 1, + "maximum": 2147483647, + "default": 104857600, "examples": [104857600] }, "format": { From 9c8924671241e946eb28d5eaa8cb9b528e41d189 Mon Sep 17 00:00:00 2001 From: bmatt <36943357+bmatticus@users.noreply.github.com> Date: Tue, 4 Jan 2022 13:26:51 -0500 Subject: [PATCH 10/12] Update docs/integrations/destinations/azureblobstorage.md Co-authored-by: Augustin --- docs/integrations/destinations/azureblobstorage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/destinations/azureblobstorage.md b/docs/integrations/destinations/azureblobstorage.md index e64d01600854..d815fc65e0ce 100644 --- a/docs/integrations/destinations/azureblobstorage.md +++ b/docs/integrations/destinations/azureblobstorage.md @@ -138,5 +138,5 @@ They will be like this in the output file: | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | | 0.1.0 | 2021-08-30 | [\#5332](https://github.com/airbytehq/airbyte/pull/5332) | Initial release with JSONL and CSV output. | -| 0.1.1 | 2021-12-29 | [\#5332](https://github.com/airbytehq/airbyte/pull/9190) | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | +| 0.1.1 | 2021-12-29 | [\#9190](https://github.com/airbytehq/airbyte/pull/9190) | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | From 2a72d3fab97e04a7213863e12371bd1513060aee Mon Sep 17 00:00:00 2001 From: bmatticus Date: Tue, 4 Jan 2022 13:27:10 -0500 Subject: [PATCH 11/12] Version updates --- .../b4c5d105-31fd-4817-96b6-cb923bfc04cb.json | 2 +- .../init/src/main/resources/seed/destination_definitions.yaml | 2 +- docs/integrations/destinations/azureblobstorage.md | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json index 4582e3038295..3c436bbd29d6 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "b4c5d105-31fd-4817-96b6-cb923bfc04cb", "name": "Azure Blob Storage", "dockerRepository": "airbyte/destination-azure-blob-storage", - "dockerImageTag": "0.1.0", + "dockerImageTag": "0.1.1", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/azureblobstorage", "icon": "azureblobstorage.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index eaae8e1430c8..b8c923039369 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -1,7 +1,7 @@ - name: Azure Blob Storage destinationDefinitionId: b4c5d105-31fd-4817-96b6-cb923bfc04cb dockerRepository: airbyte/destination-azure-blob-storage - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/destinations/azureblobstorage icon: azureblobstorage.svg - name: Amazon SQS diff --git a/docs/integrations/destinations/azureblobstorage.md b/docs/integrations/destinations/azureblobstorage.md index e64d01600854..f96942aad725 100644 --- a/docs/integrations/destinations/azureblobstorage.md +++ b/docs/integrations/destinations/azureblobstorage.md @@ -137,6 +137,7 @@ They will be like this in the output file: | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.1.0 | 2021-08-30 | [\#5332](https://github.com/airbytehq/airbyte/pull/5332) | Initial release with JSONL and CSV output. | | 0.1.1 | 2021-12-29 | [\#5332](https://github.com/airbytehq/airbyte/pull/9190) | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | +| 0.1.0 | 2021-08-30 | [\#5332](https://github.com/airbytehq/airbyte/pull/5332) | Initial release with JSONL and CSV output. | + From d02c9b0fc38591fb6b21f50f31fe924b4b6e66ab Mon Sep 17 00:00:00 2001 From: bmatticus Date: Mon, 10 Jan 2022 08:19:28 -0500 Subject: [PATCH 12/12] Converted buffer size input to megabytes rather than bytes. Removed test for zero now that spec limits input from 1 to 2047. Cut off at 2047 as 2048 megabytes would be one byte too high for buffer (signed 32 bit int). --- .../AzureBlobStorageDestinationConfig.java | 10 +++------- .../src/main/resources/spec.json | 8 ++++---- docs/integrations/destinations/azureblobstorage.md | 2 +- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java index b227534f6fc6..8d575214b678 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java @@ -29,12 +29,7 @@ public AzureBlobStorageDestinationConfig( this.accountName = accountName; this.accountKey = accountKey; this.containerName = containerName; - if (outputStreamBufferSize > 0) { - this.outputStreamBufferSize = outputStreamBufferSize; - } else { - // Handle input of 0 on settings form - this.outputStreamBufferSize = DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE; - } + this.outputStreamBufferSize = outputStreamBufferSize; this.formatConfig = formatConfig; } @@ -59,7 +54,8 @@ public AzureBlobStorageFormatConfig getFormatConfig() { } public int getOutputStreamBufferSize() { - return outputStreamBufferSize; + // Convert from MB to Bytes + return outputStreamBufferSize * 1024 * 1024; } public static AzureBlobStorageDestinationConfig getAzureBlobStorageConfig(final JsonNode config) { diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json index 2b6d7b40e0d4..efe65cb62d8f 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json @@ -46,11 +46,11 @@ "azure_blob_storage_output_buffer_size": { "title": "Azure Blob Storage output buffer size", "type": "integer", - "description": "The amount of bytes to buffer for the output stream to Azure. This will impact memory footprint on workers, but may need adjustment for performance and appropriate block size in Azure.", + "description": "The amount of megabytes to buffer for the output stream to Azure. This will impact memory footprint on workers, but may need adjustment for performance and appropriate block size in Azure.", "minimum": 1, - "maximum": 2147483647, - "default": 104857600, - "examples": [104857600] + "maximum": 2047, + "default": 5, + "examples": [5] }, "format": { "title": "Output Format", diff --git a/docs/integrations/destinations/azureblobstorage.md b/docs/integrations/destinations/azureblobstorage.md index f96942aad725..94809438f453 100644 --- a/docs/integrations/destinations/azureblobstorage.md +++ b/docs/integrations/destinations/azureblobstorage.md @@ -22,7 +22,7 @@ The Airbyte Azure Blob Storage destination allows you to sync data to Azure Blob | Azure blob storage container \(Bucket\) Name | string | A name of the Azure blob storage container. If not exists - will be created automatically. If leave empty, then will be created automatically airbytecontainer+timestamp. | | Azure Blob Storage account name | string | The account's name of the Azure Blob Storage. | | The Azure blob storage account key | string | Azure blob storage account key. Example: `abcdefghijklmnopqrstuvwxyz/0123456789+ABCDEFGHIJKLMNOPQRSTUVWXYZ/0123456789%++sampleKey==`. | -| Azure Blob Storage output buffer size | integer | Azure Blob Storage output buffer size, in bytes. Example: 104857600 | +| Azure Blob Storage output buffer size | integer | Azure Blob Storage output buffer size, in megabytes. Example: 5 | | Format | object | Format specific configuration. See below for details. | ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured blob will be wiped out before each sync. We recommend you to provision a dedicated Azure Blob Storage Container resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️