From 10d7bea363c121fbaaf4c602c5ca455e704b3076 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com> Date: Wed, 4 Jan 2023 20:58:42 +0100 Subject: [PATCH] Source File: Fix special characters bug (#21012) * Source File: Fix special characters bug * Source File: bump version; update docs * Source File: update SSH credentials in unit tests * Source File: bump version (secure) * Source File: update resources manually --- .../src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-file-secure/Dockerfile | 2 +- airbyte-integrations/connectors/source-file/Dockerfile | 2 +- .../source-file/integration_tests/conftest.py | 8 ++++---- .../source-file/integration_tests/docker-compose.yml | 2 +- .../connectors/source-file/source_file/client.py | 10 +++++++--- docs/integrations/sources/file.md | 3 ++- 8 files changed, 18 insertions(+), 13 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 4d7cf7614c44..e1878fa79a31 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -489,7 +489,7 @@ - name: File sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 dockerRepository: airbyte/source-file - dockerImageTag: 0.2.32 + dockerImageTag: 0.2.33 documentationUrl: https://docs.airbyte.com/integrations/sources/file icon: file.svg sourceType: file diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 57cceb212151..e8193a79216a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4041,7 +4041,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-file:0.2.32" +- dockerImage: "airbyte/source-file:0.2.33" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/file" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile index 69335fdec330..9d4fcd94fb3e 100644 --- a/airbyte-integrations/connectors/source-file-secure/Dockerfile +++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile @@ -9,5 +9,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.32 +LABEL io.airbyte.version=0.2.33 LABEL io.airbyte.name=airbyte/source-file-secure diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile index 4fc731577150..414ef8cd1979 100644 --- a/airbyte-integrations/connectors/source-file/Dockerfile +++ b/airbyte-integrations/connectors/source-file/Dockerfile @@ -17,5 +17,5 @@ COPY source_file ./source_file ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.32 +LABEL io.airbyte.version=0.2.33 LABEL io.airbyte.name=airbyte/source-file diff --git a/airbyte-integrations/connectors/source-file/integration_tests/conftest.py b/airbyte-integrations/connectors/source-file/integration_tests/conftest.py index 5d156ade8c3c..cec1079525af 100644 --- a/airbyte-integrations/connectors/source-file/integration_tests/conftest.py +++ b/airbyte-integrations/connectors/source-file/integration_tests/conftest.py @@ -73,7 +73,7 @@ def is_ssh_ready(ip, port): ip, port=port, username="user1", - password="pass1", + password="abc123@456#", ) return True except (SSHException, socket.error): @@ -93,9 +93,9 @@ def ssh_service(docker_ip, docker_services): def provider_config(ssh_service): def lookup(name): providers = { - "ssh": dict(storage="SSH", host=ssh_service, user="user1", password="pass1", port=2222), - "scp": dict(storage="SCP", host=ssh_service, user="user1", password="pass1", port=2222), - "sftp": dict(storage="SFTP", host=ssh_service, user="user1", password="pass1", port=100), + "ssh": dict(storage="SSH", host=ssh_service, user="user1", password="abc123@456#", port=2222), + "scp": dict(storage="SCP", host=ssh_service, user="user1", password="abc123@456#", port=2222), + "sftp": dict(storage="SFTP", host=ssh_service, user="user1", password="abc123@456#", port=100), "gcs": dict(storage="GCS"), "s3": dict(storage="S3"), "azure": dict(storage="AzBlob"), diff --git a/airbyte-integrations/connectors/source-file/integration_tests/docker-compose.yml b/airbyte-integrations/connectors/source-file/integration_tests/docker-compose.yml index a9ec97ba6b95..67306a1e102e 100644 --- a/airbyte-integrations/connectors/source-file/integration_tests/docker-compose.yml +++ b/airbyte-integrations/connectors/source-file/integration_tests/docker-compose.yml @@ -6,4 +6,4 @@ services: - "2222:22" volumes: - ./sample_files:/home/user1/files - command: user1:pass1:1001 + command: user1:abc123@456#:1001 diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py index 2e306461b01f..ec6e90877094 100644 --- a/airbyte-integrations/connectors/source-file/source_file/client.py +++ b/airbyte-integrations/connectors/source-file/source_file/client.py @@ -6,6 +6,7 @@ import json import tempfile import traceback +import urllib from os import environ from typing import Iterable from urllib.parse import urlparse @@ -108,8 +109,11 @@ def _open(self): port = self._provider["port"] return smart_open.open(f"webhdfs://{host}:{port}/{url}", **self.args) elif storage in ("ssh://", "scp://", "sftp://"): - user = self._provider["user"] - host = self._provider["host"] + # We need to quote parameters to deal with special characters + # https://bugs.python.org/issue18140 + user = urllib.parse.quote(self._provider["user"]) + host = urllib.parse.quote(self._provider["host"]) + url = urllib.parse.quote(url) # TODO: Remove int casting when https://github.com/airbytehq/airbyte/issues/4952 is addressed # TODO: The "port" field in spec.json must also be changed _port_value = self._provider.get("port", 22) @@ -120,7 +124,7 @@ def _open(self): # Explicitly turn off ssh keys stored in ~/.ssh transport_params = {"connect_kwargs": {"look_for_keys": False}, "timeout": SSH_TIMEOUT} if "password" in self._provider: - password = self._provider["password"] + password = urllib.parse.quote(self._provider["password"]) uri = f"{storage}{user}:{password}@{host}:{port}/{url}" else: uri = f"{storage}{user}@{host}:{port}/{url}" diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md index 49fcb5f480b9..7be23ba782ae 100644 --- a/docs/integrations/sources/file.md +++ b/docs/integrations/sources/file.md @@ -138,7 +138,8 @@ In order to read large files from a remote location, this connector uses the [sm ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------| :------------------------------------------------------- |:---------------------------------------------------------| +|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------| +| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug | | 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s | | 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 | | 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command |