Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Source Files: decrease give up rate #23723

Merged
merged 11 commits into from
Mar 10, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -585,11 +585,14 @@
- name: File (CSV, JSON, Excel, Feather, Parquet)
sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerRepository: airbyte/source-file
dockerImageTag: 0.2.33
dockerImageTag: 0.2.34
documentationUrl: https://docs.airbyte.com/integrations/sources/file
icon: file.svg
sourceType: file
releaseStage: generally_available
allowedHosts:
hosts:
- "*"
- name: Freshcaller
sourceDefinitionId: 8a5d48f6-03bb-4038-a942-a8d3f175cca3
dockerRepository: airbyte/source-freshcaller
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4306,7 +4306,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-file:0.2.33"
- dockerImage: "airbyte/source-file:0.2.34"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/file"
connectionSpecification:
Expand Down Expand Up @@ -4348,10 +4348,17 @@
examples:
- "{}"
- "{\"sep\": \" \"}"
- sep: "\t"
header: 0
names: "column1, column2"
url:
type: "string"
title: "URL"
description: "The URL path to access the file which should be replicated."
examples:
- "https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv"
- "gs://my-google-bucket/data.csv"
- "s3://gdelt-open-data/events/20190914.export.csv"
provider:
type: "object"
title: "Storage Provider"
Expand Down
4 changes: 2 additions & 2 deletions airbyte-integrations/connectors/source-file-secure/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM airbyte/source-file:0.2.31
FROM airbyte/source-file:0.2.34

WORKDIR /airbyte/integration_code
COPY source_file_secure ./source_file_secure
Expand All @@ -9,5 +9,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.33
LABEL io.airbyte.version=0.2.34
LABEL io.airbyte.name=airbyte/source-file-secure
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@
"type": "string",
"title": "Reader Options",
"description": "This should be a string in JSON format. It depends on the chosen file format to provide additional options and tune its behavior.",
"examples": ["{}", "{\"sep\": \" \"}"]
"examples": ["{}", "{\"sep\": \" \"}", { "sep" : "\t", "header" : 0, "names": "column1, column2"}]
},
"url": {
"type": "string",
"title": "URL",
"description": "The URL path to access the file which should be replicated."
"description": "The URL path to access the file which should be replicated.",
"examples": ["https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv", "gs://my-google-bucket/data.csv", "s3://gdelt-open-data/events/20190914.export.csv"]
},
"provider": {
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-file/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ COPY source_file ./source_file
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.33
LABEL io.airbyte.version=0.2.34
LABEL io.airbyte.name=airbyte/source-file
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def _validate_and_transform(self, config: Mapping[str, Any]):
try:
config["reader_options"] = json.loads(config["reader_options"])
except ValueError:
raise ConfigurationError("reader_options is not valid JSON")
raise ConfigurationError("Field 'reader_options' is not valid JSON. https://www.json.org/")
else:
config["reader_options"] = {}
config["url"] = dropbox_force_download(config["url"])
Expand Down Expand Up @@ -112,14 +112,11 @@ def check(self, logger, config: Mapping) -> AirbyteConnectionStatus:
list(client.streams)
return AirbyteConnectionStatus(status=Status.SUCCEEDED)
except (TypeError, ValueError, ConfigurationError) as err:
reason = (
f"Failed to load {source_url}\n Please check File Format and Reader Options are set correctly"
f"\n{repr(err)}\n{traceback.format_exc()}"
)
reason = f"Failed to load {source_url}\n Please check File Format and Reader Options are set correctly. \n{repr(err)}"
logger.error(reason)
return AirbyteConnectionStatus(status=Status.FAILED, message=reason)
except Exception as err:
reason = f"Failed to load {source_url}: {repr(err)}\n{traceback.format_exc()}"
reason = f"Failed to load {source_url}. You could have provided an invalid URL, please verify it: {repr(err)}."
logger.error(reason)
return AirbyteConnectionStatus(status=Status.FAILED, message=reason)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@
"type": "string",
"title": "Reader Options",
"description": "This should be a string in JSON format. It depends on the chosen file format to provide additional options and tune its behavior.",
"examples": ["{}", "{\"sep\": \" \"}"]
"examples": ["{}", "{\"sep\": \" \"}", { "sep" : "\t", "header" : 0, "names": "column1, column2"}]
},
"url": {
"type": "string",
"title": "URL",
"description": "The URL path to access the file which should be replicated."
"description": "The URL path to access the file which should be replicated.",
"examples": ["https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv", "gs://my-google-bucket/data.csv", "s3://gdelt-open-data/events/20190914.export.csv"]
},
"provider": {
"type": "object",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def test_discover(source, config, client):
def test_check_wrong_reader_options(source, config):
config["reader_options"] = '{encoding":"utf_16"}'
assert source.check(logger=logger, config=config) == AirbyteConnectionStatus(
status=Status.FAILED, message="reader_options is not valid JSON"
status=Status.FAILED, message="Field 'reader_options' is not valid JSON. https://www.json.org/"
)


Expand Down
2 changes: 1 addition & 1 deletion connectors.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
| **Facebook Pages** | <img alt="Facebook Pages icon" src="https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-config/init/src/main/resources/icons/facebook.svg" height="30" height="30"/> | Source | airbyte/source-facebook-pages:0.2.3 | beta | [link](https://docs.airbyte.com/integrations/sources/facebook-pages) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-facebook-pages) | <small>`010eb12f-837b-4685-892d-0a39f76a98f5`</small> |
| **Fastbill** | <img alt="Fastbill icon" src="https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-config/init/src/main/resources/icons/fastbill.svg" height="30" height="30"/> | Source | airbyte/source-fastbill:0.1.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/fastbill) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fastbill) | <small>`eb3e9c1c-0467-4eb7-a172-5265e04ccd0a`</small> |
| **Fauna** | <img alt="Fauna icon" src="https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-config/init/src/main/resources/icons/fauna.svg" height="30" height="30"/> | Source | airbyte/source-fauna:0.1.1 | alpha | [link](https://docs.airbyte.com/integrations/sources/fauna) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-fauna) | <small>`3825db3e-c94b-42ac-bd53-b5a9507ace2b`</small> |
| **File (CSV, JSON, Excel, Feather, Parquet)** | <img alt="File (CSV, JSON, Excel, Feather, Parquet) icon" src="https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-config/init/src/main/resources/icons/file.svg" height="30" height="30"/> | Source | airbyte/source-file:0.2.33 | generally_available | [link](https://docs.airbyte.com/integrations/sources/file) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | <small>`778daa7c-feaf-4db6-96f3-70fd645acc77`</small> |
| **File (CSV, JSON, Excel, Feather, Parquet)** | <img alt="File (CSV, JSON, Excel, Feather, Parquet) icon" src="https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-config/init/src/main/resources/icons/file.svg" height="30" height="30"/> | Source | airbyte/source-file:0.2.34 | generally_available | [link](https://docs.airbyte.com/integrations/sources/file) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-file) | <small>`778daa7c-feaf-4db6-96f3-70fd645acc77`</small> |
| **Firebolt** | <img alt="Firebolt icon" src="https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-config/init/src/main/resources/icons/firebolt.svg" height="30" height="30"/> | Source | airbyte/source-firebolt:0.2.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/firebolt) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-firebolt) | <small>`6f2ac653-8623-43c4-8950-19218c7caf3d`</small> |
| **Flexport** | x | Source | airbyte/source-flexport:0.1.0 | alpha | [link](https://docs.airbyte.com/integrations/sources/flexport) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-flexport) | <small>`f95337f1-2ad1-4baf-922f-2ca9152de630`</small> |
| **Freshcaller** | <img alt="Freshcaller icon" src="https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-config/init/src/main/resources/icons/freshcaller.svg" height="30" height="30"/> | Source | airbyte/source-freshcaller:0.1.0 | unknown | [link](https://docs.airbyte.com/integrations/sources/freshcaller) | [code](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-freshcaller) | <small>`8a5d48f6-03bb-4038-a942-a8d3f175cca3`</small> |
Expand Down
Loading