diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml
index 66ff8dc38a80..635578502f0b 100644
--- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml
+++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml
@@ -601,11 +601,14 @@
- name: File (CSV, JSON, Excel, Feather, Parquet)
sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerRepository: airbyte/source-file
- dockerImageTag: 0.2.34
+ dockerImageTag: 0.2.35
documentationUrl: https://docs.airbyte.com/integrations/sources/file
icon: file.svg
sourceType: file
releaseStage: generally_available
+ allowedHosts:
+ hosts:
+ - "*"
- name: Firebase Realtime Database
sourceDefinitionId: acb5f973-a565-441e-992f-4946f3e65662
dockerRepository: airbyte/source-firebase-realtime-database
diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml
index 7c3ad932c17e..a867a4f709be 100644
--- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml
+++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml
@@ -4358,7 +4358,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
-- dockerImage: "airbyte/source-file:0.2.34"
+- dockerImage: "airbyte/source-file:0.2.35"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/file"
connectionSpecification:
diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile
index 54b2a4ea95dd..c6606447d048 100644
--- a/airbyte-integrations/connectors/source-file-secure/Dockerfile
+++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile
@@ -1,4 +1,4 @@
-FROM airbyte/source-file:0.2.34
+FROM airbyte/source-file:0.2.35
WORKDIR /airbyte/integration_code
COPY source_file_secure ./source_file_secure
@@ -9,5 +9,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
-LABEL io.airbyte.version=0.2.34
+LABEL io.airbyte.version=0.2.35
LABEL io.airbyte.name=airbyte/source-file-secure
diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile
index bc428b63645f..b0fbaddd1662 100644
--- a/airbyte-integrations/connectors/source-file/Dockerfile
+++ b/airbyte-integrations/connectors/source-file/Dockerfile
@@ -17,5 +17,5 @@ COPY source_file ./source_file
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
-LABEL io.airbyte.version=0.2.34
+LABEL io.airbyte.version=0.2.35
LABEL io.airbyte.name=airbyte/source-file
diff --git a/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py b/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py
index d980e953b12a..a1d70ec923b5 100644
--- a/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py
+++ b/airbyte-integrations/connectors/source-file/integration_tests/client_storage_providers_test.py
@@ -59,7 +59,7 @@ def test__read_file_not_found(provider_config, provider_name, file_path, file_fo
)
def test__streams_from_ssh_providers(provider_config, provider_name, file_path, file_format):
client = Client(dataset_name="output", format=file_format, url=file_path, provider=provider_config(provider_name))
- streams = list(client.streams)
+ streams = list(client.streams())
assert len(streams) == 1
assert streams[0].json_schema["properties"] == {
"header1": {"type": ["string", "null"]},
diff --git a/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py b/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py
index 8fc76afdb3bf..bdf0a835d2a5 100644
--- a/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py
+++ b/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py
@@ -4,7 +4,6 @@
from pathlib import Path
-from unittest.mock import patch
import pytest
from airbyte_cdk import AirbyteLogger
@@ -79,29 +78,3 @@ def run_load_nested_json_schema(config, expected_columns=10, expected_rows=42):
df = data_list[0]
assert len(df) == expected_rows # DataFrame should have 42 items
return df
-
-
-# https://github.com/airbytehq/alpha-beta-issues/issues/174
-# this is to ensure we make all conditions under which the bug is reproduced, i.e.
-# - chunk size < file size
-# - column type in the last chunk is not `string`
-@patch("source_file.client.Client.CSV_CHUNK_SIZE", 1)
-def test_csv_schema():
- source = SourceFile()
- file_path = str(SAMPLE_DIRECTORY.parent.joinpath("discover.csv"))
- config = {"dataset_name": "test", "format": "csv", "url": file_path, "provider": {"storage": "local"}}
- catalog = source.discover(logger=AirbyteLogger(), config=config).dict()
- assert len(catalog["streams"]) == 1
- schema = catalog["streams"][0]["json_schema"]
- assert schema == {
- "$schema": "http://json-schema.org/draft-07/schema#",
- "properties": {
- "Address": {"type": ["string", "null"]},
- "City": {"type": ["string", "null"]},
- "First Name": {"type": ["string", "null"]},
- "Last Name": {"type": ["string", "null"]},
- "State": {"type": ["string", "null"]},
- "zip_code": {"type": ["string", "null"]},
- },
- "type": "object",
- }
diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py
index f18f34a9a910..18e1d7977505 100644
--- a/airbyte-integrations/connectors/source-file/source_file/client.py
+++ b/airbyte-integrations/connectors/source-file/source_file/client.py
@@ -4,6 +4,7 @@
import json
+import sys
import tempfile
import traceback
import urllib
@@ -288,11 +289,12 @@ def load_yaml(self, fp):
if self._reader_format == "yaml":
return pd.DataFrame(safe_load(fp))
- def load_dataframes(self, fp, skip_data=False) -> Iterable:
+ def load_dataframes(self, fp, skip_data=False, read_sample_chunk: bool = False) -> Iterable:
"""load and return the appropriate pandas dataframe.
:param fp: file-like object to read from
:param skip_data: limit reading data
+ :param read_sample_chunk: indicates whether a single chunk should only be read to generate schema
:return: a list of dataframe loaded from files described in the configuration
"""
readers = {
@@ -321,11 +323,16 @@ def load_dataframes(self, fp, skip_data=False) -> Iterable:
reader_options = {**self._reader_options}
try:
if self._reader_format == "csv":
+ bytes_read = 0
reader_options["chunksize"] = self.CSV_CHUNK_SIZE
if skip_data:
reader_options["nrows"] = 0
reader_options["index_col"] = 0
- yield from reader(fp, **reader_options)
+ for record in reader(fp, **reader_options):
+ bytes_read += sys.getsizeof(record)
+ yield record
+ if read_sample_chunk and bytes_read >= self.CSV_CHUNK_SIZE:
+ return
elif self._reader_options == "excel_binary":
reader_options["engine"] = "pyxlsb"
yield from reader(fp, **reader_options)
@@ -393,13 +400,17 @@ def _cache_stream(self, fp):
fp.close()
return fp_tmp
- def _stream_properties(self, fp):
+ def _stream_properties(self, fp, empty_schema: bool = False, read_sample_chunk: bool = False):
+ """
+ empty_schema param is used to check connectivity, i.e. we only read a header and do not produce stream properties
+ read_sample_chunk is used to determine if just one chunk should be read to generate schema
+ """
if self._reader_format == "yaml":
df_list = [self.load_yaml(fp)]
else:
if self.binary_source:
fp = self._cache_stream(fp)
- df_list = self.load_dataframes(fp, skip_data=False)
+ df_list = self.load_dataframes(fp, skip_data=empty_schema, read_sample_chunk=read_sample_chunk)
fields = {}
for df in df_list:
for col in df.columns:
@@ -408,8 +419,7 @@ def _stream_properties(self, fp):
fields[col] = self.dtype_to_json_type(prev_frame_column_type, df[col].dtype)
return {field: {"type": [fields[field], "null"]} for field in fields}
- @property
- def streams(self) -> Iterable:
+ def streams(self, empty_schema: bool = False) -> Iterable:
"""Discovers available streams"""
# TODO handle discovery of directories of multiple files instead
with self.reader.open() as fp:
@@ -419,6 +429,6 @@ def streams(self) -> Iterable:
json_schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
- "properties": self._stream_properties(fp),
+ "properties": self._stream_properties(fp, empty_schema=empty_schema, read_sample_chunk=True),
}
yield AirbyteStream(name=self.stream_name, json_schema=json_schema, supported_sync_modes=[SyncMode.full_refresh])
diff --git a/airbyte-integrations/connectors/source-file/source_file/source.py b/airbyte-integrations/connectors/source-file/source_file/source.py
index 3bbfa790aeae..8032704de5b0 100644
--- a/airbyte-integrations/connectors/source-file/source_file/source.py
+++ b/airbyte-integrations/connectors/source-file/source_file/source.py
@@ -79,7 +79,8 @@ def _get_client(self, config: Mapping):
return client
- def _validate_and_transform(self, config: Mapping[str, Any]):
+ @staticmethod
+ def _validate_and_transform(config: Mapping[str, Any]):
if "reader_options" in config:
try:
config["reader_options"] = json.loads(config["reader_options"])
@@ -108,9 +109,8 @@ def check(self, logger, config: Mapping) -> AirbyteConnectionStatus:
client = self._get_client(config)
source_url = client.reader.full_url
try:
- with client.reader.open():
- list(client.streams)
- return AirbyteConnectionStatus(status=Status.SUCCEEDED)
+ list(client.streams(empty_schema=True))
+ return AirbyteConnectionStatus(status=Status.SUCCEEDED)
except (TypeError, ValueError, ConfigurationError) as err:
reason = f"Failed to load {source_url}\n Please check File Format and Reader Options are set correctly. \n{repr(err)}"
logger.error(reason)
@@ -127,13 +127,13 @@ def discover(self, logger: AirbyteLogger, config: Mapping) -> AirbyteCatalog:
"""
config = self._validate_and_transform(config)
client = self._get_client(config)
- name = client.stream_name
+ name, full_url = client.stream_name, client.reader.full_url
- logger.info(f"Discovering schema of {name} at {client.reader.full_url}...")
+ logger.info(f"Discovering schema of {name} at {full_url}...")
try:
- streams = list(client.streams)
+ streams = list(client.streams())
except Exception as err:
- reason = f"Failed to discover schemas of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}"
+ reason = f"Failed to discover schemas of {name} at {full_url}: {repr(err)}\n{traceback.format_exc()}"
logger.error(reason)
raise err
return AirbyteCatalog(streams=streams)
diff --git a/connectors.md b/connectors.md
index 0dd979200259..bb2909a48d54 100644
--- a/connectors.md
+++ b/connectors.md
@@ -71,7 +71,7 @@
| **Facebook Pages** | | Source | airbyte/source-facebook-pages:0.2.3 | beta | [link](https://docs.airbyte.com/integrations/sources/facebook-pages) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-facebook-pages) | `010eb12f-837b-4685-892d-0a39f76a98f5` |
| **Fastbill** | | Source | airbyte/source-fastbill:0.1.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/fastbill) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fastbill) | `eb3e9c1c-0467-4eb7-a172-5265e04ccd0a` |
| **Fauna** | | Source | airbyte/source-fauna:0.1.1 | alpha | [link](https://docs.airbyte.com/integrations/sources/fauna) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fauna) | `3825db3e-c94b-42ac-bd53-b5a9507ace2b` |
-| **File (CSV, JSON, Excel, Feather, Parquet)** | | Source | airbyte/source-file:0.2.34 | generally_available | [link](https://docs.airbyte.com/integrations/sources/file) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | `778daa7c-feaf-4db6-96f3-70fd645acc77` |
+| **File (CSV, JSON, Excel, Feather, Parquet)** | | Source | airbyte/source-file:0.2.35 | generally_available | [link](https://docs.airbyte.com/integrations/sources/file) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | `778daa7c-feaf-4db6-96f3-70fd645acc77` |
| **Firebase Realtime Database** | x | Source | airbyte/source-firebase-realtime-database:0.1.0 | alpha | [link](https://docs.airbyte.io/integrations/sources/firebase-realtime-database) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-firebase-realtime-database) | `acb5f973-a565-441e-992f-4946f3e65662` |
| **Firebolt** | | Source | airbyte/source-firebolt:0.2.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/firebolt) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-firebolt) | `6f2ac653-8623-43c4-8950-19218c7caf3d` |
| **Flexport** | x | Source | airbyte/source-flexport:0.1.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/flexport) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-flexport) | `f95337f1-2ad1-4baf-922f-2ca9152de630` |
diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md
index 195466a7408f..16109d261279 100644
--- a/docs/integrations/sources/file.md
+++ b/docs/integrations/sources/file.md
@@ -152,43 +152,44 @@ In order to read large files from a remote location, this connector uses the [sm
## Changelog
-| Version | Date | Pull Request | Subject |
-|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------------------------------|
-| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. |
-| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug |
-| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s |
-| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 |
-| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command |
-| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. |
-| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Added retry logic for `Connection reset error - 104` |
-| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format |
-| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link |
-| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. |
-| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` |
-| 0.2.23 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. |
-| 0.2.22 | 2022-09-15 | [16772](https://github.com/airbytehq/airbyte/pull/16772) | Fix schema generation for JSON files containing arrays |
-| 0.2.21 | 2022-08-26 | [15568](https://github.com/airbytehq/airbyte/pull/15568) | Specify `pyxlsb` library for Excel Binary Workbook files |
-| 0.2.20 | 2022-08-23 | [15870](https://github.com/airbytehq/airbyte/pull/15870) | Fix CSV schema discovery |
-| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' |
-| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover |
-| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file |
-| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | added support for encoding reader option |
-| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 |
-| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files |
-| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format |
-| 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 |
-| 0.2.8 | 2021-12-06 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description |
-| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. |
-| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format |
-| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type |
-| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support |
-| 0.2.3 | 2021-06-01 | [3771](https://github.com/airbytehq/airbyte/pull/3771) | Add Azure Storage Blob Files option |
-| 0.2.2 | 2021-04-16 | [2883](https://github.com/airbytehq/airbyte/pull/2883) | Fix CSV discovery memory consumption |
-| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning |
-| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties |
-| 0.1.10 | 2021-02-18 | [2118](https://github.com/airbytehq/airbyte/pull/2118) | Support JSONL format |
-| 0.1.9 | 2021-02-02 | [1768](https://github.com/airbytehq/airbyte/pull/1768) | Add test cases for all formats |
-| 0.1.8 | 2021-01-27 | [1738](https://github.com/airbytehq/airbyte/pull/1738) | Adopt connector best practices |
-| 0.1.7 | 2020-12-16 | [1331](https://github.com/airbytehq/airbyte/pull/1331) | Refactor Python base connector |
-| 0.1.6 | 2020-12-08 | [1249](https://github.com/airbytehq/airbyte/pull/1249) | Handle NaN values |
-| 0.1.5 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file |
+| Version | Date | Pull Request | Subject |
+|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------|
+| 0.2.35 | 2023-03-03 | [24278](https://github.com/airbytehq/airbyte/pull/24278) | Read only file header when checking connectivity; read only a single chunk when discovering the schema. |
+| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. |
+| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug |
+| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s |
+| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 |
+| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command |
+| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. |
+| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Added retry logic for `Connection reset error - 104` |
+| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format |
+| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link |
+| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. |
+| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` |
+| 0.2.23 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. |
+| 0.2.22 | 2022-09-15 | [16772](https://github.com/airbytehq/airbyte/pull/16772) | Fix schema generation for JSON files containing arrays |
+| 0.2.21 | 2022-08-26 | [15568](https://github.com/airbytehq/airbyte/pull/15568) | Specify `pyxlsb` library for Excel Binary Workbook files |
+| 0.2.20 | 2022-08-23 | [15870](https://github.com/airbytehq/airbyte/pull/15870) | Fix CSV schema discovery |
+| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' |
+| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover |
+| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file |
+| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | added support for encoding reader option |
+| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 |
+| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files |
+| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format |
+| 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 |
+| 0.2.8 | 2021-12-06 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description |
+| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. |
+| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format |
+| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type |
+| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support |
+| 0.2.3 | 2021-06-01 | [3771](https://github.com/airbytehq/airbyte/pull/3771) | Add Azure Storage Blob Files option |
+| 0.2.2 | 2021-04-16 | [2883](https://github.com/airbytehq/airbyte/pull/2883) | Fix CSV discovery memory consumption |
+| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning |
+| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties |
+| 0.1.10 | 2021-02-18 | [2118](https://github.com/airbytehq/airbyte/pull/2118) | Support JSONL format |
+| 0.1.9 | 2021-02-02 | [1768](https://github.com/airbytehq/airbyte/pull/1768) | Add test cases for all formats |
+| 0.1.8 | 2021-01-27 | [1738](https://github.com/airbytehq/airbyte/pull/1738) | Adopt connector best practices |
+| 0.1.7 | 2020-12-16 | [1331](https://github.com/airbytehq/airbyte/pull/1331) | Refactor Python base connector |
+| 0.1.6 | 2020-12-08 | [1249](https://github.com/airbytehq/airbyte/pull/1249) | Handle NaN values |
+| 0.1.5 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file |