diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 66ff8dc38a80..635578502f0b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -601,11 +601,14 @@ - name: File (CSV, JSON, Excel, Feather, Parquet) sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 dockerRepository: airbyte/source-file - dockerImageTag: 0.2.34 + dockerImageTag: 0.2.35 documentationUrl: https://docs.airbyte.com/integrations/sources/file icon: file.svg sourceType: file releaseStage: generally_available + allowedHosts: + hosts: + - "*" - name: Firebase Realtime Database sourceDefinitionId: acb5f973-a565-441e-992f-4946f3e65662 dockerRepository: airbyte/source-firebase-realtime-database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 7c3ad932c17e..a867a4f709be 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4358,7 +4358,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-file:0.2.34" +- dockerImage: "airbyte/source-file:0.2.35" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/file" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile index 54b2a4ea95dd..c6606447d048 100644 --- a/airbyte-integrations/connectors/source-file-secure/Dockerfile +++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile @@ -1,4 +1,4 @@ -FROM airbyte/source-file:0.2.34 +FROM airbyte/source-file:0.2.35 WORKDIR /airbyte/integration_code COPY source_file_secure ./source_file_secure @@ -9,5 +9,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.34 +LABEL io.airbyte.version=0.2.35 LABEL io.airbyte.name=airbyte/source-file-secure diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile index bc428b63645f..b0fbaddd1662 100644 --- a/airbyte-integrations/connectors/source-file/Dockerfile +++ b/airbyte-integrations/connectors/source-file/Dockerfile @@ -17,5 +17,5 @@ COPY source_file ./source_file ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.34 +LABEL io.airbyte.version=0.2.35 LABEL io.airbyte.name=airbyte/source-file diff --git a/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py b/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py index d980e953b12a..a1d70ec923b5 100644 --- a/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py +++ b/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py @@ -59,7 +59,7 @@ def test__read_file_not_found(provider_config, provider_name, file_path, file_fo ) def test__streams_from_ssh_providers(provider_config, provider_name, file_path, file_format): client = Client(dataset_name="output", format=file_format, url=file_path, provider=provider_config(provider_name)) - streams = list(client.streams) + streams = list(client.streams()) assert len(streams) == 1 assert streams[0].json_schema["properties"] == { "header1": {"type": ["string", "null"]}, diff --git a/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py b/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py index 8fc76afdb3bf..bdf0a835d2a5 100644 --- a/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py +++ b/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py @@ -4,7 +4,6 @@ from pathlib import Path -from unittest.mock import patch import pytest from airbyte_cdk import AirbyteLogger @@ -79,29 +78,3 @@ def run_load_nested_json_schema(config, expected_columns=10, expected_rows=42): df = data_list[0] assert len(df) == expected_rows # DataFrame should have 42 items return df - - -# https://github.com/airbytehq/alpha-beta-issues/issues/174 -# this is to ensure we make all conditions under which the bug is reproduced, i.e. -# - chunk size < file size -# - column type in the last chunk is not `string` -@patch("source_file.client.Client.CSV_CHUNK_SIZE", 1) -def test_csv_schema(): - source = SourceFile() - file_path = str(SAMPLE_DIRECTORY.parent.joinpath("discover.csv")) - config = {"dataset_name": "test", "format": "csv", "url": file_path, "provider": {"storage": "local"}} - catalog = source.discover(logger=AirbyteLogger(), config=config).dict() - assert len(catalog["streams"]) == 1 - schema = catalog["streams"][0]["json_schema"] - assert schema == { - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "Address": {"type": ["string", "null"]}, - "City": {"type": ["string", "null"]}, - "First Name": {"type": ["string", "null"]}, - "Last Name": {"type": ["string", "null"]}, - "State": {"type": ["string", "null"]}, - "zip_code": {"type": ["string", "null"]}, - }, - "type": "object", - } diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py index f18f34a9a910..18e1d7977505 100644 --- a/airbyte-integrations/connectors/source-file/source_file/client.py +++ b/airbyte-integrations/connectors/source-file/source_file/client.py @@ -4,6 +4,7 @@ import json +import sys import tempfile import traceback import urllib @@ -288,11 +289,12 @@ def load_yaml(self, fp): if self._reader_format == "yaml": return pd.DataFrame(safe_load(fp)) - def load_dataframes(self, fp, skip_data=False) -> Iterable: + def load_dataframes(self, fp, skip_data=False, read_sample_chunk: bool = False) -> Iterable: """load and return the appropriate pandas dataframe. :param fp: file-like object to read from :param skip_data: limit reading data + :param read_sample_chunk: indicates whether a single chunk should only be read to generate schema :return: a list of dataframe loaded from files described in the configuration """ readers = { @@ -321,11 +323,16 @@ def load_dataframes(self, fp, skip_data=False) -> Iterable: reader_options = {**self._reader_options} try: if self._reader_format == "csv": + bytes_read = 0 reader_options["chunksize"] = self.CSV_CHUNK_SIZE if skip_data: reader_options["nrows"] = 0 reader_options["index_col"] = 0 - yield from reader(fp, **reader_options) + for record in reader(fp, **reader_options): + bytes_read += sys.getsizeof(record) + yield record + if read_sample_chunk and bytes_read >= self.CSV_CHUNK_SIZE: + return elif self._reader_options == "excel_binary": reader_options["engine"] = "pyxlsb" yield from reader(fp, **reader_options) @@ -393,13 +400,17 @@ def _cache_stream(self, fp): fp.close() return fp_tmp - def _stream_properties(self, fp): + def _stream_properties(self, fp, empty_schema: bool = False, read_sample_chunk: bool = False): + """ + empty_schema param is used to check connectivity, i.e. we only read a header and do not produce stream properties + read_sample_chunk is used to determine if just one chunk should be read to generate schema + """ if self._reader_format == "yaml": df_list = [self.load_yaml(fp)] else: if self.binary_source: fp = self._cache_stream(fp) - df_list = self.load_dataframes(fp, skip_data=False) + df_list = self.load_dataframes(fp, skip_data=empty_schema, read_sample_chunk=read_sample_chunk) fields = {} for df in df_list: for col in df.columns: @@ -408,8 +419,7 @@ def _stream_properties(self, fp): fields[col] = self.dtype_to_json_type(prev_frame_column_type, df[col].dtype) return {field: {"type": [fields[field], "null"]} for field in fields} - @property - def streams(self) -> Iterable: + def streams(self, empty_schema: bool = False) -> Iterable: """Discovers available streams""" # TODO handle discovery of directories of multiple files instead with self.reader.open() as fp: @@ -419,6 +429,6 @@ def streams(self) -> Iterable: json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", - "properties": self._stream_properties(fp), + "properties": self._stream_properties(fp, empty_schema=empty_schema, read_sample_chunk=True), } yield AirbyteStream(name=self.stream_name, json_schema=json_schema, supported_sync_modes=[SyncMode.full_refresh]) diff --git a/airbyte-integrations/connectors/source-file/source_file/source.py b/airbyte-integrations/connectors/source-file/source_file/source.py index 3bbfa790aeae..8032704de5b0 100644 --- a/airbyte-integrations/connectors/source-file/source_file/source.py +++ b/airbyte-integrations/connectors/source-file/source_file/source.py @@ -79,7 +79,8 @@ def _get_client(self, config: Mapping): return client - def _validate_and_transform(self, config: Mapping[str, Any]): + @staticmethod + def _validate_and_transform(config: Mapping[str, Any]): if "reader_options" in config: try: config["reader_options"] = json.loads(config["reader_options"]) @@ -108,9 +109,8 @@ def check(self, logger, config: Mapping) -> AirbyteConnectionStatus: client = self._get_client(config) source_url = client.reader.full_url try: - with client.reader.open(): - list(client.streams) - return AirbyteConnectionStatus(status=Status.SUCCEEDED) + list(client.streams(empty_schema=True)) + return AirbyteConnectionStatus(status=Status.SUCCEEDED) except (TypeError, ValueError, ConfigurationError) as err: reason = f"Failed to load {source_url}\n Please check File Format and Reader Options are set correctly. \n{repr(err)}" logger.error(reason) @@ -127,13 +127,13 @@ def discover(self, logger: AirbyteLogger, config: Mapping) -> AirbyteCatalog: """ config = self._validate_and_transform(config) client = self._get_client(config) - name = client.stream_name + name, full_url = client.stream_name, client.reader.full_url - logger.info(f"Discovering schema of {name} at {client.reader.full_url}...") + logger.info(f"Discovering schema of {name} at {full_url}...") try: - streams = list(client.streams) + streams = list(client.streams()) except Exception as err: - reason = f"Failed to discover schemas of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}" + reason = f"Failed to discover schemas of {name} at {full_url}: {repr(err)}\n{traceback.format_exc()}" logger.error(reason) raise err return AirbyteCatalog(streams=streams) diff --git a/connectors.md b/connectors.md index 0dd979200259..bb2909a48d54 100644 --- a/connectors.md +++ b/connectors.md @@ -71,7 +71,7 @@ | **Facebook Pages** | Facebook Pages icon | Source | airbyte/source-facebook-pages:0.2.3 | beta | [link](https://docs.airbyte.com/integrations/sources/facebook-pages) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-facebook-pages) | `010eb12f-837b-4685-892d-0a39f76a98f5` | | **Fastbill** | Fastbill icon | Source | airbyte/source-fastbill:0.1.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/fastbill) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fastbill) | `eb3e9c1c-0467-4eb7-a172-5265e04ccd0a` | | **Fauna** | Fauna icon | Source | airbyte/source-fauna:0.1.1 | alpha | [link](https://docs.airbyte.com/integrations/sources/fauna) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fauna) | `3825db3e-c94b-42ac-bd53-b5a9507ace2b` | -| **File (CSV, JSON, Excel, Feather, Parquet)** | File (CSV, JSON, Excel, Feather, Parquet) icon | Source | airbyte/source-file:0.2.34 | generally_available | [link](https://docs.airbyte.com/integrations/sources/file) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | `778daa7c-feaf-4db6-96f3-70fd645acc77` | +| **File (CSV, JSON, Excel, Feather, Parquet)** | File (CSV, JSON, Excel, Feather, Parquet) icon | Source | airbyte/source-file:0.2.35 | generally_available | [link](https://docs.airbyte.com/integrations/sources/file) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | `778daa7c-feaf-4db6-96f3-70fd645acc77` | | **Firebase Realtime Database** | x | Source | airbyte/source-firebase-realtime-database:0.1.0 | alpha | [link](https://docs.airbyte.io/integrations/sources/firebase-realtime-database) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-firebase-realtime-database) | `acb5f973-a565-441e-992f-4946f3e65662` | | **Firebolt** | Firebolt icon | Source | airbyte/source-firebolt:0.2.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/firebolt) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-firebolt) | `6f2ac653-8623-43c4-8950-19218c7caf3d` | | **Flexport** | x | Source | airbyte/source-flexport:0.1.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/flexport) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-flexport) | `f95337f1-2ad1-4baf-922f-2ca9152de630` | diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md index 195466a7408f..16109d261279 100644 --- a/docs/integrations/sources/file.md +++ b/docs/integrations/sources/file.md @@ -152,43 +152,44 @@ In order to read large files from a remote location, this connector uses the [sm ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------------------------------| -| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. | -| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug | -| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s | -| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 | -| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command | -| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. | -| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Added retry logic for `Connection reset error - 104` | -| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format | -| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link | -| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. | -| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` | -| 0.2.23 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | -| 0.2.22 | 2022-09-15 | [16772](https://github.com/airbytehq/airbyte/pull/16772) | Fix schema generation for JSON files containing arrays | -| 0.2.21 | 2022-08-26 | [15568](https://github.com/airbytehq/airbyte/pull/15568) | Specify `pyxlsb` library for Excel Binary Workbook files | -| 0.2.20 | 2022-08-23 | [15870](https://github.com/airbytehq/airbyte/pull/15870) | Fix CSV schema discovery | -| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' | -| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover | -| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file | -| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | added support for encoding reader option | -| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 | -| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files | -| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format | -| 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 | -| 0.2.8 | 2021-12-06 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | -| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. | -| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format | -| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type | -| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | -| 0.2.3 | 2021-06-01 | [3771](https://github.com/airbytehq/airbyte/pull/3771) | Add Azure Storage Blob Files option | -| 0.2.2 | 2021-04-16 | [2883](https://github.com/airbytehq/airbyte/pull/2883) | Fix CSV discovery memory consumption | -| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | -| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | -| 0.1.10 | 2021-02-18 | [2118](https://github.com/airbytehq/airbyte/pull/2118) | Support JSONL format | -| 0.1.9 | 2021-02-02 | [1768](https://github.com/airbytehq/airbyte/pull/1768) | Add test cases for all formats | -| 0.1.8 | 2021-01-27 | [1738](https://github.com/airbytehq/airbyte/pull/1738) | Adopt connector best practices | -| 0.1.7 | 2020-12-16 | [1331](https://github.com/airbytehq/airbyte/pull/1331) | Refactor Python base connector | -| 0.1.6 | 2020-12-08 | [1249](https://github.com/airbytehq/airbyte/pull/1249) | Handle NaN values | -| 0.1.5 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------| +| 0.2.35 | 2023-03-03 | [24278](https://github.com/airbytehq/airbyte/pull/24278) | Read only file header when checking connectivity; read only a single chunk when discovering the schema. | +| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. | +| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug | +| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s | +| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 | +| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command | +| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. | +| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Added retry logic for `Connection reset error - 104` | +| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format | +| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link | +| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. | +| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` | +| 0.2.23 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | +| 0.2.22 | 2022-09-15 | [16772](https://github.com/airbytehq/airbyte/pull/16772) | Fix schema generation for JSON files containing arrays | +| 0.2.21 | 2022-08-26 | [15568](https://github.com/airbytehq/airbyte/pull/15568) | Specify `pyxlsb` library for Excel Binary Workbook files | +| 0.2.20 | 2022-08-23 | [15870](https://github.com/airbytehq/airbyte/pull/15870) | Fix CSV schema discovery | +| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' | +| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover | +| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file | +| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | added support for encoding reader option | +| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 | +| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files | +| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format | +| 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 | +| 0.2.8 | 2021-12-06 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | +| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. | +| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format | +| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type | +| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | +| 0.2.3 | 2021-06-01 | [3771](https://github.com/airbytehq/airbyte/pull/3771) | Add Azure Storage Blob Files option | +| 0.2.2 | 2021-04-16 | [2883](https://github.com/airbytehq/airbyte/pull/2883) | Fix CSV discovery memory consumption | +| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | +| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | +| 0.1.10 | 2021-02-18 | [2118](https://github.com/airbytehq/airbyte/pull/2118) | Support JSONL format | +| 0.1.9 | 2021-02-02 | [1768](https://github.com/airbytehq/airbyte/pull/1768) | Add test cases for all formats | +| 0.1.8 | 2021-01-27 | [1738](https://github.com/airbytehq/airbyte/pull/1738) | Adopt connector best practices | +| 0.1.7 | 2020-12-16 | [1331](https://github.com/airbytehq/airbyte/pull/1331) | Refactor Python base connector | +| 0.1.6 | 2020-12-08 | [1249](https://github.com/airbytehq/airbyte/pull/1249) | Handle NaN values | +| 0.1.5 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file |