From 3b59c10ee7f2f133702a17f5075cd9f4b263f025 Mon Sep 17 00:00:00 2001 From: Daryna Ishchenko <80129833+darynaishchenko@users.noreply.github.com> Date: Fri, 10 Mar 2023 11:13:05 +0200 Subject: [PATCH] Source Files: decrease give up rate (#23723) * updated spec, docs, error messages * added change log * updated error message in unit tests * added allowedHosts * refactored error message and url descriprion * updated example for url field in spec * bump source-file verion for source-file-secure * auto-bump connector version * updated spec.json for source-file-secure --------- Co-authored-by: Octavia Squidington III --- .../resources/seed/source_definitions.yaml | 5 +- .../src/main/resources/seed/source_specs.yaml | 9 +- .../connectors/source-file-secure/Dockerfile | 4 +- .../integration_tests/spec.json | 5 +- .../connectors/source-file/Dockerfile | 2 +- .../source-file/source_file/source.py | 9 +- .../source-file/source_file/spec.json | 5 +- .../source-file/unit_tests/test_source.py | 2 +- connectors.md | 2 +- docs/integrations/sources/file.md | 192 ++++++++++-------- 10 files changed, 129 insertions(+), 106 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index e008265e8f1a..18545cd4e5c7 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -595,11 +595,14 @@ - name: File (CSV, JSON, Excel, Feather, Parquet) sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 dockerRepository: airbyte/source-file - dockerImageTag: 0.2.33 + dockerImageTag: 0.2.34 documentationUrl: https://docs.airbyte.com/integrations/sources/file icon: file.svg sourceType: file releaseStage: generally_available + allowedHosts: + hosts: + - "*" - name: Freshcaller sourceDefinitionId: 8a5d48f6-03bb-4038-a942-a8d3f175cca3 dockerRepository: airbyte/source-freshcaller diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index e90afb42694b..cff20084964a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4317,7 +4317,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-file:0.2.33" +- dockerImage: "airbyte/source-file:0.2.34" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/file" connectionSpecification: @@ -4359,10 +4359,17 @@ examples: - "{}" - "{\"sep\": \" \"}" + - sep: "\t" + header: 0 + names: "column1, column2" url: type: "string" title: "URL" description: "The URL path to access the file which should be replicated." + examples: + - "https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv" + - "gs://my-google-bucket/data.csv" + - "s3://gdelt-open-data/events/20190914.export.csv" provider: type: "object" title: "Storage Provider" diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile index 9d4fcd94fb3e..54b2a4ea95dd 100644 --- a/airbyte-integrations/connectors/source-file-secure/Dockerfile +++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile @@ -1,4 +1,4 @@ -FROM airbyte/source-file:0.2.31 +FROM airbyte/source-file:0.2.34 WORKDIR /airbyte/integration_code COPY source_file_secure ./source_file_secure @@ -9,5 +9,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.33 +LABEL io.airbyte.version=0.2.34 LABEL io.airbyte.name=airbyte/source-file-secure diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/spec.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/spec.json index 8b3932c079f8..3534ed9a384e 100644 --- a/airbyte-integrations/connectors/source-file-secure/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/spec.json @@ -32,12 +32,13 @@ "type": "string", "title": "Reader Options", "description": "This should be a string in JSON format. It depends on the chosen file format to provide additional options and tune its behavior.", - "examples": ["{}", "{\"sep\": \" \"}"] + "examples": ["{}", "{\"sep\": \" \"}", { "sep" : "\t", "header" : 0, "names": "column1, column2"}] }, "url": { "type": "string", "title": "URL", - "description": "The URL path to access the file which should be replicated." + "description": "The URL path to access the file which should be replicated.", + "examples": ["https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv", "gs://my-google-bucket/data.csv", "s3://gdelt-open-data/events/20190914.export.csv"] }, "provider": { "type": "object", diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile index 414ef8cd1979..bc428b63645f 100644 --- a/airbyte-integrations/connectors/source-file/Dockerfile +++ b/airbyte-integrations/connectors/source-file/Dockerfile @@ -17,5 +17,5 @@ COPY source_file ./source_file ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.33 +LABEL io.airbyte.version=0.2.34 LABEL io.airbyte.name=airbyte/source-file diff --git a/airbyte-integrations/connectors/source-file/source_file/source.py b/airbyte-integrations/connectors/source-file/source_file/source.py index cdcd66c197ba..3bbfa790aeae 100644 --- a/airbyte-integrations/connectors/source-file/source_file/source.py +++ b/airbyte-integrations/connectors/source-file/source_file/source.py @@ -84,7 +84,7 @@ def _validate_and_transform(self, config: Mapping[str, Any]): try: config["reader_options"] = json.loads(config["reader_options"]) except ValueError: - raise ConfigurationError("reader_options is not valid JSON") + raise ConfigurationError("Field 'reader_options' is not valid JSON. https://www.json.org/") else: config["reader_options"] = {} config["url"] = dropbox_force_download(config["url"]) @@ -112,14 +112,11 @@ def check(self, logger, config: Mapping) -> AirbyteConnectionStatus: list(client.streams) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except (TypeError, ValueError, ConfigurationError) as err: - reason = ( - f"Failed to load {source_url}\n Please check File Format and Reader Options are set correctly" - f"\n{repr(err)}\n{traceback.format_exc()}" - ) + reason = f"Failed to load {source_url}\n Please check File Format and Reader Options are set correctly. \n{repr(err)}" logger.error(reason) return AirbyteConnectionStatus(status=Status.FAILED, message=reason) except Exception as err: - reason = f"Failed to load {source_url}: {repr(err)}\n{traceback.format_exc()}" + reason = f"Failed to load {source_url}. You could have provided an invalid URL, please verify it: {repr(err)}." logger.error(reason) return AirbyteConnectionStatus(status=Status.FAILED, message=reason) diff --git a/airbyte-integrations/connectors/source-file/source_file/spec.json b/airbyte-integrations/connectors/source-file/source_file/spec.json index d430448dc81b..ccc162b2692f 100644 --- a/airbyte-integrations/connectors/source-file/source_file/spec.json +++ b/airbyte-integrations/connectors/source-file/source_file/spec.json @@ -33,12 +33,13 @@ "type": "string", "title": "Reader Options", "description": "This should be a string in JSON format. It depends on the chosen file format to provide additional options and tune its behavior.", - "examples": ["{}", "{\"sep\": \" \"}"] + "examples": ["{}", "{\"sep\": \" \"}", { "sep" : "\t", "header" : 0, "names": "column1, column2"}] }, "url": { "type": "string", "title": "URL", - "description": "The URL path to access the file which should be replicated." + "description": "The URL path to access the file which should be replicated.", + "examples": ["https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv", "gs://my-google-bucket/data.csv", "s3://gdelt-open-data/events/20190914.export.csv"] }, "provider": { "type": "object", diff --git a/airbyte-integrations/connectors/source-file/unit_tests/test_source.py b/airbyte-integrations/connectors/source-file/unit_tests/test_source.py index 76f4c4dc5c30..f2365baa961d 100644 --- a/airbyte-integrations/connectors/source-file/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-file/unit_tests/test_source.py @@ -150,7 +150,7 @@ def test_discover(source, config, client): def test_check_wrong_reader_options(source, config): config["reader_options"] = '{encoding":"utf_16"}' assert source.check(logger=logger, config=config) == AirbyteConnectionStatus( - status=Status.FAILED, message="reader_options is not valid JSON" + status=Status.FAILED, message="Field 'reader_options' is not valid JSON. https://www.json.org/" ) diff --git a/connectors.md b/connectors.md index 0dc092445e66..a1c7d99448bb 100644 --- a/connectors.md +++ b/connectors.md @@ -71,7 +71,7 @@ | **Facebook Pages** | Facebook Pages icon | Source | airbyte/source-facebook-pages:0.2.3 | beta | [link](https://docs.airbyte.com/integrations/sources/facebook-pages) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-facebook-pages) | `010eb12f-837b-4685-892d-0a39f76a98f5` | | **Fastbill** | Fastbill icon | Source | airbyte/source-fastbill:0.1.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/fastbill) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fastbill) | `eb3e9c1c-0467-4eb7-a172-5265e04ccd0a` | | **Fauna** | Fauna icon | Source | airbyte/source-fauna:0.1.1 | alpha | [link](https://docs.airbyte.com/integrations/sources/fauna) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fauna) | `3825db3e-c94b-42ac-bd53-b5a9507ace2b` | -| **File (CSV, JSON, Excel, Feather, Parquet)** | File (CSV, JSON, Excel, Feather, Parquet) icon | Source | airbyte/source-file:0.2.33 | generally_available | [link](https://docs.airbyte.com/integrations/sources/file) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | `778daa7c-feaf-4db6-96f3-70fd645acc77` | +| **File (CSV, JSON, Excel, Feather, Parquet)** | File (CSV, JSON, Excel, Feather, Parquet) icon | Source | airbyte/source-file:0.2.34 | generally_available | [link](https://docs.airbyte.com/integrations/sources/file) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | `778daa7c-feaf-4db6-96f3-70fd645acc77` | | **Firebolt** | Firebolt icon | Source | airbyte/source-firebolt:0.2.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/firebolt) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-firebolt) | `6f2ac653-8623-43c4-8950-19218c7caf3d` | | **Flexport** | x | Source | airbyte/source-flexport:0.1.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/flexport) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-flexport) | `f95337f1-2ad1-4baf-922f-2ca9152de630` | | **Freshcaller** | Freshcaller icon | Source | airbyte/source-freshcaller:0.1.0 | unknown | [link](https://docs.airbyte.com/integrations/sources/freshcaller) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-freshcaller) | `8a5d48f6-03bb-4038-a942-a8d3f175cca3` | diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md index 8fc11bb91059..195466a7408f 100644 --- a/docs/integrations/sources/file.md +++ b/docs/integrations/sources/file.md @@ -1,57 +1,15 @@ # Files (CSV, JSON, Excel, Feather, Parquet) -## Features +This page contains the setup guide and reference information for the Files source connector. -| Feature | Supported? | -| ---------------------------------------- | ---------- | -| Full Refresh Sync | Yes | -| Incremental Sync | No | -| Replicate Incremental Deletes | No | -| Replicate Folders (multiple Files) | No | -| Replicate Glob Patterns (multiple Files) | No | +## Prerequisites +* URL to access the file +* Format +* Reader options +* Storage Providers -This source produces a single table for the target file as it replicates only one file at a time for the moment. Note that you should provide the `dataset_name` which dictates how the table will be identified in the destination (since `URL` can be made of complex characters). -### Storage Providers - -| Storage Providers | Supported? | -| ---------------------- | ----------------------------------------------- | -| HTTPS | Yes | -| Google Cloud Storage | Yes | -| Amazon Web Services S3 | Yes | -| SFTP | Yes | -| SSH / SCP | Yes | -| local filesystem | Local use only (inaccessible for Airbyte Cloud) | - -### File / Stream Compression - -| Compression | Supported? | -| ----------- | ---------- | -| Gzip | Yes | -| Zip | No | -| Bzip2 | No | -| Lzma | No | -| Xz | No | -| Snappy | No | - -### File Formats - -| Format | Supported? | -| --------------------- | ---------- | -| CSV | Yes | -| JSON | Yes | -| HTML | No | -| XML | No | -| Excel | Yes | -| Excel Binary Workbook | Yes | -| Feather | Yes | -| Parquet | Yes | -| Pickle | No | -| YAML | Yes | - -**This connector does not support syncing unstructured data files such as raw text, audio, or videos.** - -## Getting Started +## Setup guide @@ -99,7 +57,62 @@ For example, you can use the `{"orient" : "records"}` to change how orientation If you need to read Excel Binary Workbook, please specify `excel_binary` format in `File Format` select. -#### Changing data types of source columns + :::warning + This connector does not support syncing unstructured data files such as raw text, audio, or videos. + ::: + +## Supported sync modes + +| Feature | Supported? | +| ---------------------------------------- | ---------- | +| Full Refresh Sync | Yes | +| Incremental Sync | No | +| Replicate Incremental Deletes | No | +| Replicate Folders (multiple Files) | No | +| Replicate Glob Patterns (multiple Files) | No | + + :::info + This source produces a single table for the target file as it replicates only one file at a time for the moment. Note that you should provide the `dataset_name` which dictates how the table will be identified in the destination (since `URL` can be made of complex characters). + ::: + +## File / Stream Compression + +| Compression | Supported? | +| ----------- | ---------- | +| Gzip | Yes | +| Zip | No | +| Bzip2 | No | +| Lzma | No | +| Xz | No | +| Snappy | No | + +## Storage Providers + +| Storage Providers | Supported? | +| ---------------------- | ----------------------------------------------- | +| HTTPS | Yes | +| Google Cloud Storage | Yes | +| Amazon Web Services S3 | Yes | +| SFTP | Yes | +| SSH / SCP | Yes | +| local filesystem | Local use only (inaccessible for Airbyte Cloud) | + +### File Formats + +| Format | Supported? | +| --------------------- | ---------- | +| CSV | Yes | +| JSON | Yes | +| HTML | No | +| XML | No | +| Excel | Yes | +| Excel Binary Workbook | Yes | +| Feather | Yes | +| Parquet | Yes | +| Pickle | No | +| YAML | Yes | + +### Changing data types of source columns Normally, Airbyte tries to infer the data type from the source, but you can use `reader_options` to force specific data types. If you input `{"dtype":"string"}`, all columns will be forced to be parsed as strings. If you only want a specific column to be parsed as a string, simply use `{"dtype" : {"column name": "string"}}`. @@ -139,42 +152,43 @@ In order to read large files from a remote location, this connector uses the [sm ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------| -| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug | -| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s | -| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 | -| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command | -| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. | -| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Added retry logic for `Connection reset error - 104` | -| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format | -| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link | -| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. | -| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` | -| 0.2.23 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | -| 0.2.22 | 2022-09-15 | [16772](https://github.com/airbytehq/airbyte/pull/16772) | Fix schema generation for JSON files containing arrays | -| 0.2.21 | 2022-08-26 | [15568](https://github.com/airbytehq/airbyte/pull/15568) | Specify `pyxlsb` library for Excel Binary Workbook files | -| 0.2.20 | 2022-08-23 | [15870](https://github.com/airbytehq/airbyte/pull/15870) | Fix CSV schema discovery | -| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' | -| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover | -| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file | -| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | added support for encoding reader option | -| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 | -| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files | -| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format | -| 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 | -| 0.2.8 | 2021-12-06 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | -| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. | -| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format | -| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type | -| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | -| 0.2.3 | 2021-06-01 | [3771](https://github.com/airbytehq/airbyte/pull/3771) | Add Azure Storage Blob Files option | -| 0.2.2 | 2021-04-16 | [2883](https://github.com/airbytehq/airbyte/pull/2883) | Fix CSV discovery memory consumption | -| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | -| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | -| 0.1.10 | 2021-02-18 | [2118](https://github.com/airbytehq/airbyte/pull/2118) | Support JSONL format | -| 0.1.9 | 2021-02-02 | [1768](https://github.com/airbytehq/airbyte/pull/1768) | Add test cases for all formats | -| 0.1.8 | 2021-01-27 | [1738](https://github.com/airbytehq/airbyte/pull/1738) | Adopt connector best practices | -| 0.1.7 | 2020-12-16 | [1331](https://github.com/airbytehq/airbyte/pull/1331) | Refactor Python base connector | -| 0.1.6 | 2020-12-08 | [1249](https://github.com/airbytehq/airbyte/pull/1249) | Handle NaN values | -| 0.1.5 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------------------------------| +| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. | +| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug | +| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s | +| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 | +| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command | +| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. | +| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Added retry logic for `Connection reset error - 104` | +| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format | +| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link | +| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. | +| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` | +| 0.2.23 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | +| 0.2.22 | 2022-09-15 | [16772](https://github.com/airbytehq/airbyte/pull/16772) | Fix schema generation for JSON files containing arrays | +| 0.2.21 | 2022-08-26 | [15568](https://github.com/airbytehq/airbyte/pull/15568) | Specify `pyxlsb` library for Excel Binary Workbook files | +| 0.2.20 | 2022-08-23 | [15870](https://github.com/airbytehq/airbyte/pull/15870) | Fix CSV schema discovery | +| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' | +| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover | +| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file | +| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | added support for encoding reader option | +| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 | +| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files | +| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format | +| 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 | +| 0.2.8 | 2021-12-06 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | +| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. | +| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format | +| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type | +| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | +| 0.2.3 | 2021-06-01 | [3771](https://github.com/airbytehq/airbyte/pull/3771) | Add Azure Storage Blob Files option | +| 0.2.2 | 2021-04-16 | [2883](https://github.com/airbytehq/airbyte/pull/2883) | Fix CSV discovery memory consumption | +| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | +| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | +| 0.1.10 | 2021-02-18 | [2118](https://github.com/airbytehq/airbyte/pull/2118) | Support JSONL format | +| 0.1.9 | 2021-02-02 | [1768](https://github.com/airbytehq/airbyte/pull/1768) | Add test cases for all formats | +| 0.1.8 | 2021-01-27 | [1738](https://github.com/airbytehq/airbyte/pull/1738) | Adopt connector best practices | +| 0.1.7 | 2020-12-16 | [1331](https://github.com/airbytehq/airbyte/pull/1331) | Refactor Python base connector | +| 0.1.6 | 2020-12-08 | [1249](https://github.com/airbytehq/airbyte/pull/1249) | Handle NaN values | +| 0.1.5 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file |