From 71d64a55b8e5ee1ffa2caf1149c78a614f96b62a Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com> Date: Tue, 6 Feb 2024 11:45:58 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Source=20S3:=20Add=20region=20to=20?= =?UTF-8?q?S3=20source=20(#34842)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Håkon Åmdal --- .../source-s3/integration_tests/cloud_spec.json | 16 ++++++++++++++-- .../source-s3/integration_tests/spec.json | 16 ++++++++++++++-- .../connectors/source-s3/metadata.yaml | 2 +- .../connectors/source-s3/source_s3/source.py | 10 ++++++++-- .../connectors/source-s3/source_s3/v4/config.py | 7 +++++++ .../source-s3/source_s3/v4/stream_reader.py | 4 ++++ docs/integrations/sources/s3.md | 3 ++- 7 files changed, 50 insertions(+), 8 deletions(-) diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json index ed084d3b08d3..3d0f3a14fc07 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json @@ -380,6 +380,12 @@ "order": 4, "type": "string" }, + "region_name": { + "title": "AWS Region", + "description": "AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.", + "order": 5, + "type": "string" + }, "dataset": { "title": "Output Stream Name", "description": "Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.", @@ -620,7 +626,7 @@ "title": "AWS Role ARN", "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", "always_show": true, - "order": 6, + "order": 7, "type": "string" }, "path_prefix": { @@ -637,13 +643,19 @@ "order": 4, "type": "string" }, + "region_name": { + "title": "AWS Region", + "description": "AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.", + "order": 5, + "type": "string" + }, "start_date": { "title": "Start Date", "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.", "examples": ["2021-01-01T00:00:00Z"], "format": "date-time", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", - "order": 5, + "order": 6, "type": "string" } }, diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json index 76a48ffb09a5..8592fc5684ef 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json @@ -379,6 +379,12 @@ "order": 4, "type": "string" }, + "region_name": { + "title": "AWS Region", + "description": "AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.", + "order": 5, + "type": "string" + }, "dataset": { "title": "Output Stream Name", "description": "Deprecated and will be removed soon. Please do not use this field anymore and use streams.name instead. The name of the stream you would like this source to output. Can contain letters, numbers, or underscores.", @@ -619,7 +625,7 @@ "title": "AWS Role ARN", "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", "always_show": true, - "order": 6, + "order": 7, "type": "string" }, "path_prefix": { @@ -636,13 +642,19 @@ "order": 4, "type": "string" }, + "region_name": { + "title": "AWS Region", + "description": "AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.", + "order": 5, + "type": "string" + }, "start_date": { "title": "Start Date", "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.", "examples": ["2021-01-01T00:00:00Z"], "format": "date-time", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", - "order": 5, + "order": 6, "type": "string" } }, diff --git a/airbyte-integrations/connectors/source-s3/metadata.yaml b/airbyte-integrations/connectors/source-s3/metadata.yaml index 6bc8379f6be7..71aa5fac4cc3 100644 --- a/airbyte-integrations/connectors/source-s3/metadata.yaml +++ b/airbyte-integrations/connectors/source-s3/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: file connectorType: source definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 - dockerImageTag: 4.5.0 + dockerImageTag: 4.5.1 dockerRepository: airbyte/source-s3 documentationUrl: https://docs.airbyte.com/integrations/sources/s3 githubIssueLabel: source-s3 diff --git a/airbyte-integrations/connectors/source-s3/source_s3/source.py b/airbyte-integrations/connectors/source-s3/source_s3/source.py index 224f7b036e4a..3fe19247578f 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/source.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/source.py @@ -43,7 +43,7 @@ class Config: description="Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations " f"requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", always_show=True, - order=6, + order=7, ) path_prefix: str = Field( default="", @@ -54,13 +54,19 @@ class Config: ) endpoint: str = Field("", description="Endpoint to an S3 compatible service. Leave empty to use AWS.", order=4) + region_name: Optional[str] = Field( + title="AWS Region", + default=None, + description="AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.", + order=5, + ) start_date: Optional[str] = Field( title="Start Date", description="UTC date and time in the format 2017-01-25T00:00:00Z. Any file modified before this date will not be replicated.", examples=["2021-01-01T00:00:00Z"], format="date-time", pattern="^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", - order=5, + order=6, ) provider: S3Provider diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py index 55c3b5708f59..349377e069ad 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py @@ -55,6 +55,13 @@ def documentation_url(cls) -> AnyUrl: order=4, ) + region_name: Optional[str] = Field( + title="AWS Region", + default=None, + description="AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.", + order=5, + ) + @root_validator def validate_optional_args(cls, values): aws_access_key_id = values.get("aws_access_key_id") diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py index 0457dba4ee36..d914690ee70c 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py @@ -58,6 +58,10 @@ def s3_client(self) -> BaseClient: if self._s3_client is None: client_kv_args = _get_s3_compatible_client_args(self.config) if self.config.endpoint else {} + # Set the region_name if it's provided in the config + if self.config.region_name: + client_kv_args["region_name"] = self.config.region_name + if self.config.role_arn: self._s3_client = self._get_iam_s3_client(client_kv_args) else: diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index 4118b968367d..85c18ab945d9 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -321,9 +321,10 @@ To perform the text extraction from PDF and Docx files, the connector uses the [ | Version | Date | Pull Request | Subject | |:--------|:-----------|:----------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------| +| 4.5.1 | 2024-02-02 | [31701](https://github.com/airbytehq/airbyte/pull/31701) | Add `region` support | | 4.5.0 | 2024-02-01 | [34591](https://github.com/airbytehq/airbyte/pull/34591) | Run full refresh syncs concurrently | | 4.4.1 | 2024-01-30 | [34665](https://github.com/airbytehq/airbyte/pull/34665) | Pin moto & CDK version | -| 4.4.0 | 2023-01-12 | [33818](https://github.com/airbytehq/airbyte/pull/33818) | Add IAM Role Authentication | +| 4.4.0 | 2024-01-12 | [33818](https://github.com/airbytehq/airbyte/pull/33818) | Add IAM Role Authentication | | 4.3.1 | 2024-01-04 | [33937](https://github.com/airbytehq/airbyte/pull/33937) | Prepare for airbyte-lib | | 4.3.0 | 2023-12-14 | [33411](https://github.com/airbytehq/airbyte/pull/33411) | Bump CDK version to auto-set primary key for document file streams and support raw txt files | | 4.2.4 | 2023-12-06 | [33187](https://github.com/airbytehq/airbyte/pull/33187) | Bump CDK version to hide source-defined primary key |