From f829ac50adc67b46262276f74590607c6bb25445 Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Wed, 8 Mar 2023 22:23:11 +0000 Subject: [PATCH 01/71] New connector_builder module for handling requests from the Connector Builder. Also implements `resolve_manifest` handler --- .../python/connector_builder/README.md | 32 +++ .../python/connector_builder/__init__.py | 3 + .../connector_builder_source.py | 41 ++++ .../source_declarative_manifest/main.py | 44 +++- .../unit_tests/connector_builder/__init__.py | 3 + .../test_connector_builder_source.py | 205 ++++++++++++++++++ .../test_source_declarative_manifest.py | 128 +++++++---- 7 files changed, 404 insertions(+), 52 deletions(-) create mode 100644 airbyte-cdk/python/connector_builder/README.md create mode 100644 airbyte-cdk/python/connector_builder/__init__.py create mode 100644 airbyte-cdk/python/connector_builder/connector_builder_source.py create mode 100644 airbyte-cdk/python/unit_tests/connector_builder/__init__.py create mode 100644 airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py diff --git a/airbyte-cdk/python/connector_builder/README.md b/airbyte-cdk/python/connector_builder/README.md new file mode 100644 index 000000000000..6c444267e52f --- /dev/null +++ b/airbyte-cdk/python/connector_builder/README.md @@ -0,0 +1,32 @@ +# Connector Builder Backend + +This is the backend for requests from the [Connector Builder](https://docs.airbyte.com/connector-development/config-based/connector-builder-ui/). + +## Local development + +### Locally running the Connector Builder backend + +``` +python main.py read --config secrets/config.json +``` + +Note: Requires the keys `__injected_declarative_manifest` and `__command` in its config, where `__injected_declarative_manifest` is a JSON manifest and `__command` is one of the commands handled by the ConnectorBuilderSource (`stream_read`, `list_streams`, or `resolve_manifest`). + +### Locally running the docker image + +#### Build + +First, make sure you build the latest Docker image: +``` +./gradlew airbyte-cdk:python:airbyteDocker +``` + +The docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in the Dockerfile. + +#### Run + +Then run any of the connector commands as follows: + +``` +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-declarative-manifest:dev read --config /secrets/config.json +``` diff --git a/airbyte-cdk/python/connector_builder/__init__.py b/airbyte-cdk/python/connector_builder/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-cdk/python/connector_builder/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-cdk/python/connector_builder/connector_builder_source.py b/airbyte-cdk/python/connector_builder/connector_builder_source.py new file mode 100644 index 000000000000..c530feb23a36 --- /dev/null +++ b/airbyte-cdk/python/connector_builder/connector_builder_source.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# +from datetime import datetime +from typing import Any, Mapping, Union + +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from airbyte_cdk.utils.traced_exception import AirbyteTracedException + + +class ConnectorBuilderSource: + def __init__(self, source: ManifestDeclarativeSource): + self.source = source + + def list_streams(self) -> AirbyteRecordMessage: + raise NotImplementedError + + def stream_read(self, command_config) -> AirbyteRecordMessage: + raise NotImplementedError + + @staticmethod + def _emitted_at(): + return int(datetime.now().timestamp()) * 1000 + + def resolve_manifest(self) -> Union[AirbyteMessage, AirbyteRecordMessage]: + try: + return AirbyteRecordMessage( + data={"manifest": self.source.resolved_manifest}, + emitted_at=self._emitted_at(), + stream="", + ) + except Exception as exc: + error = AirbyteTracedException.from_exception(exc, message="Error resolving manifest.") + return error.as_airbyte_message() + + def handle_request(self, config: Mapping[str, Any]) -> Union[AirbyteMessage, AirbyteRecordMessage]: + command = config.get("__command") + if command == "resolve_manifest": + return self.resolve_manifest() + raise ValueError(f"Unrecognized command {command}.") diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 2c1bdcb2b782..6cf9f08f1050 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -3,27 +3,55 @@ # +import argparse import sys -from typing import List +from typing import Any, List, Mapping, Tuple from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch +from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from connector_builder.connector_builder_source import ConnectorBuilderSource -def create_manifest(args: List[str]): - parsed_args = AirbyteEntrypoint.parse_args(args) - if parsed_args.command == "spec": +def create_source(config: Mapping[str, Any]) -> DeclarativeSource: + manifest = config.get("__injected_declarative_manifest") + return ManifestDeclarativeSource(manifest) + + +def get_config_from_args(args: List[str]) -> Mapping[str, Any]: + command, config_filepath = preparse(args) + if command == "spec": raise ValueError("spec command is not supported for injected declarative manifest") - config = BaseConnector.read_config(parsed_args.config) + config = BaseConnector.read_config(config_filepath) + if "__injected_declarative_manifest" not in config: raise ValueError( f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}" ) - return ManifestDeclarativeSource(config.get("__injected_declarative_manifest")) + + return config + + +def preparse(args: List[str]) -> Tuple[str, str]: + parser = argparse.ArgumentParser() + parser.add_argument("command", type=str, help="Airbyte Protocol command") + parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") + parsed, _ = parser.parse_known_args(args) + return parsed.command, parsed.config + + +def handle_request(args: List[str]): + config = get_config_from_args(args) + source = create_source(config) + if "__command" in config: + ConnectorBuilderSource(source).handle_request(config) + else: + # Verify that the correct args are present for the production codepaths. + AirbyteEntrypoint.parse_args(args) + launch(source, sys.argv[1:]) if __name__ == "__main__": - source = create_manifest(sys.argv[1:]) - launch(source, sys.argv[1:]) + handle_request(sys.argv[1:]) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/__init__.py b/airbyte-cdk/python/unit_tests/connector_builder/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/connector_builder/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py new file mode 100644 index 000000000000..c9315e979b52 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py @@ -0,0 +1,205 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# +import copy + +import pytest +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from airbyte_cdk.sources.declarative.parsers.custom_exceptions import UndefinedReferenceException +from connector_builder.connector_builder_source import ConnectorBuilderSource + +_stream_name = "stream_with_custom_requester" +_stream_primary_key = "id" +_stream_url_base = "https://api.sendgrid.com" +_stream_options = {"name": _stream_name, "primary_key": _stream_primary_key, "url_base": _stream_url_base} + +MANIFEST = { + "version": "version", + "definitions": { + "schema_loader": {"name": "{{ options.stream_name }}", "file_path": "./source_sendgrid/schemas/{{ options.name }}.yaml"}, + "retriever": { + "paginator": { + "type": "DefaultPaginator", + "page_size": 10, + "page_size_option": {"inject_into": "request_parameter", "field_name": "page_size"}, + "page_token_option": {"inject_into": "path", "type": "RequestPath"}, + "pagination_strategy": {"type": "CursorPagination", "cursor_value": "{{ response._metadata.next }}"}, + }, + "requester": { + "path": "/v3/marketing/lists", + "authenticator": {"type": "BearerAuthenticator", "api_token": "{{ config.apikey }}"}, + "request_parameters": {"page_size": "10"}, + }, + "record_selector": {"extractor": {"field_path": ["result"]}}, + }, + }, + "streams": [ + { + "type": "DeclarativeStream", + "$parameters": _stream_options, + "schema_loader": {"$ref": "#/definitions/schema_loader"}, + "retriever": "#/definitions/retriever", + }, + ], + "check": {"type": "CheckStream", "stream_names": ["lists"]}, +} + + +CONFIG = { + "__injected_declarative_manifest": MANIFEST, + "__command": "resolve_manifest", +} + + +@pytest.mark.parametrize( + "command", + [ + pytest.param("asdf", id="test_arbitrary_command_error"), + pytest.param(None, id="test_command_is_none_error"), + pytest.param("", id="test_command_is_empty_error"), + ], +) +def test_invalid_command(command): + source = ConnectorBuilderSource(ManifestDeclarativeSource(MANIFEST)) + config = copy.deepcopy(CONFIG) + config["__command"] = command + with pytest.raises(ValueError): + source.handle_request(config) + + +def test_resolve_manifest(): + source = ConnectorBuilderSource(ManifestDeclarativeSource(MANIFEST)) + resolved_manifest = source.handle_request(CONFIG) + + expected_resolved_manifest = { + "type": "DeclarativeSource", + "version": "version", + "definitions": { + "schema_loader": {"name": "{{ options.stream_name }}", "file_path": "./source_sendgrid/schemas/{{ options.name }}.yaml"}, + "retriever": { + "paginator": { + "type": "DefaultPaginator", + "page_size": 10, + "page_size_option": {"inject_into": "request_parameter", "field_name": "page_size"}, + "page_token_option": {"inject_into": "path", "type": "RequestPath"}, + "pagination_strategy": {"type": "CursorPagination", "cursor_value": "{{ response._metadata.next }}"}, + }, + "requester": { + "path": "/v3/marketing/lists", + "authenticator": {"type": "BearerAuthenticator", "api_token": "{{ config.apikey }}"}, + "request_parameters": {"page_size": "10"}, + }, + "record_selector": {"extractor": {"field_path": ["result"]}}, + }, + }, + "streams": [ + { + "type": "DeclarativeStream", + "schema_loader": { + "type": "JsonFileSchemaLoader", + "name": "{{ options.stream_name }}", + "file_path": "./source_sendgrid/schemas/{{ options.name }}.yaml", + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "retriever": { + "type": "SimpleRetriever", + "paginator": { + "type": "DefaultPaginator", + "page_size": 10, + "page_size_option": { + "type": "RequestOption", + "inject_into": "request_parameter", + "field_name": "page_size", + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "page_token_option": { + "type": "RequestPath", + "inject_into": "path", + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "pagination_strategy": { + "type": "CursorPagination", + "cursor_value": "{{ response._metadata.next }}", + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "requester": { + "type": "HttpRequester", + "path": "/v3/marketing/lists", + "authenticator": { + "type": "BearerAuthenticator", + "api_token": "{{ config.apikey }}", + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "request_parameters": {"page_size": "10"}, + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "record_selector": { + "type": "RecordSelector", + "extractor": { + "type": "DpathExtractor", + "field_path": ["result"], + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + "name": _stream_name, + "primary_key": _stream_primary_key, + "url_base": _stream_url_base, + "$parameters": _stream_options, + }, + ], + "check": {"type": "CheckStream", "stream_names": ["lists"]}, + } + assert resolved_manifest.data["manifest"] == expected_resolved_manifest + + +def test_resolve_manifest_error_returns_error_response(): + class MockManifestDeclarativeSource: + @property + def resolved_manifest(self): + raise ValueError + + source = ConnectorBuilderSource(MockManifestDeclarativeSource()) + response = source.handle_request(CONFIG) + assert "Error resolving manifest" in response.trace.error.message + + +def test_resolve_manifest_cannot_instantiate_source(): + manifest = copy.deepcopy(MANIFEST) + manifest["streams"][0]["retriever"] = "#/definitions/retrieverasdf" + with pytest.raises(UndefinedReferenceException) as actual_exception: + ConnectorBuilderSource(ManifestDeclarativeSource(manifest)) + assert "Undefined reference #/definitions/retriever" in actual_exception.value.args[0] diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index 63757dda03f5..e40374cfc1ac 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -4,44 +4,22 @@ import copy import json +from unittest import mock import pytest +import source_declarative_manifest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from source_declarative_manifest.main import create_manifest +from source_declarative_manifest.main import handle_request CONFIG = { "__injected_declarative_manifest": { "version": "0.1.0", "definitions": { - "selector": { - "extractor": { - "field_path": [] - } - }, - "requester": { - "url_base": "https://test.com/api", - "http_method": "GET" - }, - "retriever": { - "record_selector": { - "$ref": "#/definitions/selector" - }, - "requester": { - "$ref": "#/definitions/requester" - } - }, - "base_stream": { - "retriever": { - "$ref": "#/definitions/retriever" - } - }, - "data_stream": { - "$ref": "#/definitions/base_stream", - "$parameters": { - "name": "data", - "path": "/data" - } - }, + "selector": {"extractor": {"field_path": []}}, + "requester": {"url_base": "https://test.com/api", "http_method": "GET"}, + "retriever": {"record_selector": {"$ref": "#/definitions/selector"}, "requester": {"$ref": "#/definitions/requester"}}, + "base_stream": {"retriever": {"$ref": "#/definitions/retriever"}}, + "data_stream": {"$ref": "#/definitions/base_stream", "$parameters": {"name": "data", "path": "/data"}}, }, "streams": [ "#/definitions/data_stream", @@ -59,40 +37,102 @@ "title": "Test Spec", "type": "object", "additionalProperties": True, - "properties": {} - } - } + "properties": {}, + }, + }, } } +CATALOG = {} + @pytest.fixture def valid_config_file(tmp_path): - config_file = tmp_path / "config.json" - config_file.write_text(json.dumps(CONFIG)) - return config_file + return _write_to_tmp_path(tmp_path, CONFIG, "config") + + +@pytest.fixture +def catalog_file(tmp_path): + return _write_to_tmp_path(tmp_path, CATALOG, "catalog") @pytest.fixture def config_file_without_injection(tmp_path): config = copy.deepcopy(CONFIG) del config["__injected_declarative_manifest"] + return _write_to_tmp_path(tmp_path, config, "config") - config_file = tmp_path / "config.json" + +@pytest.fixture +def config_file_with_command(tmp_path): + config = copy.deepcopy(CONFIG) + config["__command"] = "command" + return _write_to_tmp_path(tmp_path, config, "config") + + +def _write_to_tmp_path(tmp_path, config, filename): + config_file = tmp_path / f"{filename}.json" config_file.write_text(json.dumps(config)) return config_file -def test_on_spec_command_then_raise_value_error(): +def test_on_spec_command_then_raise_value_error(valid_config_file): with pytest.raises(ValueError): - create_manifest(["spec"]) + handle_request(["spec", "--config", str(valid_config_file)]) -def test_given_no_injected_declarative_manifest_then_raise_value_error(config_file_without_injection): +@pytest.mark.parametrize( + "command", + [ + pytest.param("check", id="test_check_command_error"), + pytest.param("discover", id="test_discover_command_error"), + pytest.param("read", id="test_read_command_error"), + pytest.param("asdf", id="test_arbitrary_command_error"), + ], +) +def test_given_no_injected_declarative_manifest_then_raise_value_error(command, config_file_without_injection): with pytest.raises(ValueError): - create_manifest(["check", "--config", str(config_file_without_injection)]) + handle_request([command, "--config", str(config_file_without_injection)]) + + +@pytest.mark.parametrize( + "command", + [ + pytest.param("check", id="test_check_command_error"), + pytest.param("discover", id="test_discover_command_error"), + pytest.param("read", id="test_read_command_error"), + pytest.param("asdf", id="test_arbitrary_command_error"), + ], +) +def test_missing_config_raises_value_error(command): + with pytest.raises(SystemExit): + handle_request([command]) + + +@pytest.mark.parametrize( + "command", + [ + pytest.param("check", id="test_check_command"), + pytest.param("discover", id="test_discover_command"), + pytest.param("read", id="test_read_command"), + ], +) +def test_given_injected_declarative_manifest_then_launch_with_declarative_manifest(command, valid_config_file, catalog_file): + with mock.patch("source_declarative_manifest.main.launch") as patch: + if command == "read": + handle_request([command, "--config", str(valid_config_file), "--catalog", str(catalog_file)]) + else: + handle_request([command, "--config", str(valid_config_file)]) + source, _ = patch.call_args[0] + assert isinstance(source, ManifestDeclarativeSource) + + +def test_given_injected_declarative_manifest_then_launch_with_declarative_manifest_missing_arg(valid_config_file): + with pytest.raises(SystemExit): + handle_request(["read", "--config", str(valid_config_file)]) -def test_given_injected_declarative_manifest_then_return_declarative_manifest(valid_config_file): - source = create_manifest(["check", "--config", str(valid_config_file)]) - assert isinstance(source, ManifestDeclarativeSource) +def test_given_command_then_use_connector_builder_source(config_file_with_command): + with mock.patch.object(source_declarative_manifest.main.ConnectorBuilderSource, "handle_request") as patch: + handle_request(["read", "--config", str(config_file_with_command)]) + assert patch.call_count == 1 From 13a9a14fa3f4e7e3c196157d50b5e51737a3c2b7 Mon Sep 17 00:00:00 2001 From: clnoll Date: Wed, 8 Mar 2023 22:39:44 +0000 Subject: [PATCH 02/71] Automated Commit - Formatting Changes --- airbyte-cdk/python/connector_builder/connector_builder_source.py | 1 + .../connector_builder/test_connector_builder_source.py | 1 + 2 files changed, 2 insertions(+) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_source.py b/airbyte-cdk/python/connector_builder/connector_builder_source.py index c530feb23a36..8ca0bcc0492d 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_source.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_source.py @@ -1,6 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # + from datetime import datetime from typing import Any, Mapping, Union diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py index c9315e979b52..dd84547ca439 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py @@ -1,6 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # + import copy import pytest From 1c85330f17bdb24e472c74b664c6aafd9f5c1b46 Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Thu, 9 Mar 2023 12:25:18 +0000 Subject: [PATCH 03/71] Rename ConnectorBuilderSource to ConnectorBuilderHandler --- airbyte-cdk/python/connector_builder/README.md | 2 +- ...er_source.py => connector_builder_handler.py} | 2 +- .../python/source_declarative_manifest/main.py | 4 ++-- ...urce.py => test_connector_builder_handler.py} | 16 ++++++++-------- .../test_source_declarative_manifest.py | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) rename airbyte-cdk/python/connector_builder/{connector_builder_source.py => connector_builder_handler.py} (97%) rename airbyte-cdk/python/unit_tests/connector_builder/{test_connector_builder_source.py => test_connector_builder_handler.py} (94%) diff --git a/airbyte-cdk/python/connector_builder/README.md b/airbyte-cdk/python/connector_builder/README.md index 6c444267e52f..ac2db315bc3e 100644 --- a/airbyte-cdk/python/connector_builder/README.md +++ b/airbyte-cdk/python/connector_builder/README.md @@ -10,7 +10,7 @@ This is the backend for requests from the [Connector Builder](https://docs.airby python main.py read --config secrets/config.json ``` -Note: Requires the keys `__injected_declarative_manifest` and `__command` in its config, where `__injected_declarative_manifest` is a JSON manifest and `__command` is one of the commands handled by the ConnectorBuilderSource (`stream_read`, `list_streams`, or `resolve_manifest`). +Note: Requires the keys `__injected_declarative_manifest` and `__command` in its config, where `__injected_declarative_manifest` is a JSON manifest and `__command` is one of the commands handled by the ConnectorBuilderHandler (`stream_read`, `list_streams`, or `resolve_manifest`). ### Locally running the docker image diff --git a/airbyte-cdk/python/connector_builder/connector_builder_source.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py similarity index 97% rename from airbyte-cdk/python/connector_builder/connector_builder_source.py rename to airbyte-cdk/python/connector_builder/connector_builder_handler.py index 8ca0bcc0492d..4a2f5953b3eb 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_source.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -10,7 +10,7 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException -class ConnectorBuilderSource: +class ConnectorBuilderHandler: def __init__(self, source: ManifestDeclarativeSource): self.source = source diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 6cf9f08f1050..890fc37931a5 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -11,7 +11,7 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_source import ConnectorBuilderSource +from connector_builder.connector_builder_handler import ConnectorBuilderHandler def create_source(config: Mapping[str, Any]) -> DeclarativeSource: @@ -46,7 +46,7 @@ def handle_request(args: List[str]): config = get_config_from_args(args) source = create_source(config) if "__command" in config: - ConnectorBuilderSource(source).handle_request(config) + ConnectorBuilderHandler(source).handle_request(config) else: # Verify that the correct args are present for the production codepaths. AirbyteEntrypoint.parse_args(args) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py similarity index 94% rename from airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py rename to airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index dd84547ca439..e2713d6068c5 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_source.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -7,7 +7,7 @@ import pytest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.declarative.parsers.custom_exceptions import UndefinedReferenceException -from connector_builder.connector_builder_source import ConnectorBuilderSource +from connector_builder.connector_builder_handler import ConnectorBuilderHandler _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -61,16 +61,16 @@ ], ) def test_invalid_command(command): - source = ConnectorBuilderSource(ManifestDeclarativeSource(MANIFEST)) + handler = ConnectorBuilderHandler(ManifestDeclarativeSource(MANIFEST)) config = copy.deepcopy(CONFIG) config["__command"] = command with pytest.raises(ValueError): - source.handle_request(config) + handler.handle_request(config) def test_resolve_manifest(): - source = ConnectorBuilderSource(ManifestDeclarativeSource(MANIFEST)) - resolved_manifest = source.handle_request(CONFIG) + handler = ConnectorBuilderHandler(ManifestDeclarativeSource(MANIFEST)) + resolved_manifest = handler.handle_request(CONFIG) expected_resolved_manifest = { "type": "DeclarativeSource", @@ -193,8 +193,8 @@ class MockManifestDeclarativeSource: def resolved_manifest(self): raise ValueError - source = ConnectorBuilderSource(MockManifestDeclarativeSource()) - response = source.handle_request(CONFIG) + handler = ConnectorBuilderHandler(MockManifestDeclarativeSource()) + response = handler.handle_request(CONFIG) assert "Error resolving manifest" in response.trace.error.message @@ -202,5 +202,5 @@ def test_resolve_manifest_cannot_instantiate_source(): manifest = copy.deepcopy(MANIFEST) manifest["streams"][0]["retriever"] = "#/definitions/retrieverasdf" with pytest.raises(UndefinedReferenceException) as actual_exception: - ConnectorBuilderSource(ManifestDeclarativeSource(manifest)) + ConnectorBuilderHandler(ManifestDeclarativeSource(manifest)) assert "Undefined reference #/definitions/retriever" in actual_exception.value.args[0] diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index e40374cfc1ac..788495dd04fb 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -132,7 +132,7 @@ def test_given_injected_declarative_manifest_then_launch_with_declarative_manife handle_request(["read", "--config", str(valid_config_file)]) -def test_given_command_then_use_connector_builder_source(config_file_with_command): - with mock.patch.object(source_declarative_manifest.main.ConnectorBuilderSource, "handle_request") as patch: +def test_given_command_then_use_connector_builder_handler(config_file_with_command): + with mock.patch.object(source_declarative_manifest.main.ConnectorBuilderHandler, "handle_request") as patch: handle_request(["read", "--config", str(config_file_with_command)]) assert patch.call_count == 1 From 0dccb4ee018ec0d6d6a32acf2bf8e6b1087b479e Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Thu, 9 Mar 2023 12:34:55 +0000 Subject: [PATCH 04/71] Update source_declarative_manifest README --- airbyte-cdk/python/source_declarative_manifest/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte-cdk/python/source_declarative_manifest/README.md b/airbyte-cdk/python/source_declarative_manifest/README.md index 7a723a4b6d33..665e03e8fda1 100644 --- a/airbyte-cdk/python/source_declarative_manifest/README.md +++ b/airbyte-cdk/python/source_declarative_manifest/README.md @@ -7,6 +7,8 @@ This entrypoint is used for connectors created by the connector builder. These c The spec operation is not supported because the config is not known when running a spec. +This entrypoint is also the entrypoint for requests from the [Connector Builder](https://docs.airbyte.com/connector-development/config-based/connector-builder-ui/) Server. In addition to the `__injected_declarative_manifest`, the Connector Builder backend config requires the `__command` key, whose value is one of the commands handled by the ConnectorBuilderHandler (`stream_read`, `list_streams`, or `resolve_manifest`). + ## Local development #### Building From f7a475ab03bc4b12034c7432032662d20ecde37c Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Thu, 9 Mar 2023 16:19:29 +0000 Subject: [PATCH 05/71] Reorganize --- .../connector_builder_handler.py | 55 ++++++++----------- .../source_declarative_manifest/main.py | 21 +++++-- .../test_connector_builder_handler.py | 38 ++----------- .../test_source_declarative_manifest.py | 20 ++++++- 4 files changed, 62 insertions(+), 72 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 4a2f5953b3eb..a1d8f6a6f647 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -3,40 +3,31 @@ # from datetime import datetime -from typing import Any, Mapping, Union +from typing import Union from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage -from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.utils.traced_exception import AirbyteTracedException -class ConnectorBuilderHandler: - def __init__(self, source: ManifestDeclarativeSource): - self.source = source - - def list_streams(self) -> AirbyteRecordMessage: - raise NotImplementedError - - def stream_read(self, command_config) -> AirbyteRecordMessage: - raise NotImplementedError - - @staticmethod - def _emitted_at(): - return int(datetime.now().timestamp()) * 1000 - - def resolve_manifest(self) -> Union[AirbyteMessage, AirbyteRecordMessage]: - try: - return AirbyteRecordMessage( - data={"manifest": self.source.resolved_manifest}, - emitted_at=self._emitted_at(), - stream="", - ) - except Exception as exc: - error = AirbyteTracedException.from_exception(exc, message="Error resolving manifest.") - return error.as_airbyte_message() - - def handle_request(self, config: Mapping[str, Any]) -> Union[AirbyteMessage, AirbyteRecordMessage]: - command = config.get("__command") - if command == "resolve_manifest": - return self.resolve_manifest() - raise ValueError(f"Unrecognized command {command}.") +def list_streams() -> AirbyteRecordMessage: + raise NotImplementedError + + +def stream_read() -> AirbyteRecordMessage: + raise NotImplementedError + + +def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: + try: + return AirbyteRecordMessage( + data={"manifest": source.resolved_manifest}, + emitted_at=_emitted_at(), + stream="", + ) + except Exception as exc: + error = AirbyteTracedException.from_exception(exc, message="Error resolving manifest.") + return error.as_airbyte_message() + + +def _emitted_at(): + return int(datetime.now().timestamp()) * 1000 diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 890fc37931a5..2d66c822477d 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -11,7 +11,7 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import ConnectorBuilderHandler +from connector_builder import connector_builder_handler def create_source(config: Mapping[str, Any]) -> DeclarativeSource: @@ -42,15 +42,26 @@ def preparse(args: List[str]) -> Tuple[str, str]: return parsed.command, parsed.config +def handle_connector_builder_request(source: DeclarativeSource, config: Mapping[str, Any]): + command = config.get("__command") + if command == "resolve_manifest": + return connector_builder_handler.resolve_manifest(source) + raise ValueError(f"Unrecognized command {command}.") + + +def handle_connector_request(source: DeclarativeSource, args: List[str]): + # Verify that the correct args are present for the production codepaths. + AirbyteEntrypoint.parse_args(args) + launch(source, sys.argv[1:]) + + def handle_request(args: List[str]): config = get_config_from_args(args) source = create_source(config) if "__command" in config: - ConnectorBuilderHandler(source).handle_request(config) + handle_connector_builder_request(source, config) else: - # Verify that the correct args are present for the production codepaths. - AirbyteEntrypoint.parse_args(args) - launch(source, sys.argv[1:]) + handle_connector_request(source, args) if __name__ == "__main__": diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index e2713d6068c5..26e652df0faf 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -2,12 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import copy - -import pytest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from airbyte_cdk.sources.declarative.parsers.custom_exceptions import UndefinedReferenceException -from connector_builder.connector_builder_handler import ConnectorBuilderHandler +from connector_builder.connector_builder_handler import resolve_manifest _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -52,25 +48,9 @@ } -@pytest.mark.parametrize( - "command", - [ - pytest.param("asdf", id="test_arbitrary_command_error"), - pytest.param(None, id="test_command_is_none_error"), - pytest.param("", id="test_command_is_empty_error"), - ], -) -def test_invalid_command(command): - handler = ConnectorBuilderHandler(ManifestDeclarativeSource(MANIFEST)) - config = copy.deepcopy(CONFIG) - config["__command"] = command - with pytest.raises(ValueError): - handler.handle_request(config) - - def test_resolve_manifest(): - handler = ConnectorBuilderHandler(ManifestDeclarativeSource(MANIFEST)) - resolved_manifest = handler.handle_request(CONFIG) + source = ManifestDeclarativeSource(MANIFEST) + resolved_manifest = resolve_manifest(source) expected_resolved_manifest = { "type": "DeclarativeSource", @@ -193,14 +173,6 @@ class MockManifestDeclarativeSource: def resolved_manifest(self): raise ValueError - handler = ConnectorBuilderHandler(MockManifestDeclarativeSource()) - response = handler.handle_request(CONFIG) + source = MockManifestDeclarativeSource() + response = resolve_manifest(source) assert "Error resolving manifest" in response.trace.error.message - - -def test_resolve_manifest_cannot_instantiate_source(): - manifest = copy.deepcopy(MANIFEST) - manifest["streams"][0]["retriever"] = "#/definitions/retrieverasdf" - with pytest.raises(UndefinedReferenceException) as actual_exception: - ConnectorBuilderHandler(ManifestDeclarativeSource(manifest)) - assert "Undefined reference #/definitions/retriever" in actual_exception.value.args[0] diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index 788495dd04fb..7b6d75f42f28 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -9,7 +9,7 @@ import pytest import source_declarative_manifest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from source_declarative_manifest.main import handle_request +from source_declarative_manifest.main import handle_connector_builder_request, handle_request CONFIG = { "__injected_declarative_manifest": { @@ -133,6 +133,22 @@ def test_given_injected_declarative_manifest_then_launch_with_declarative_manife def test_given_command_then_use_connector_builder_handler(config_file_with_command): - with mock.patch.object(source_declarative_manifest.main.ConnectorBuilderHandler, "handle_request") as patch: + with mock.patch.object(source_declarative_manifest.main, "handle_connector_builder_request") as patch: handle_request(["read", "--config", str(config_file_with_command)]) assert patch.call_count == 1 + + +@pytest.mark.parametrize( + "command", + [ + pytest.param("asdf", id="test_arbitrary_command_error"), + pytest.param(None, id="test_command_is_none_error"), + pytest.param("", id="test_command_is_empty_error"), + ], +) +def test_invalid_command(command): + config = copy.deepcopy(CONFIG) + config["__command"] = command + source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) + with pytest.raises(ValueError): + handle_connector_builder_request(source, config) From bd71e91d2e373f740b88882d855be3339f83c273 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 14:36:38 -0800 Subject: [PATCH 06/71] read records --- .../source_declarative_manifest/main.py | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 2d66c822477d..676fa498637d 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -12,6 +12,8 @@ from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder import connector_builder_handler +from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode +import logging def create_source(config: Mapping[str, Any]) -> DeclarativeSource: @@ -34,20 +36,47 @@ def get_config_from_args(args: List[str]) -> Mapping[str, Any]: return config -def preparse(args: List[str]) -> Tuple[str, str]: +def preparse(args: List[str]) -> Tuple[str, str, str]: parser = argparse.ArgumentParser() parser.add_argument("command", type=str, help="Airbyte Protocol command") parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") parsed, _ = parser.parse_known_args(args) return parsed.command, parsed.config - -def handle_connector_builder_request(source: DeclarativeSource, config: Mapping[str, Any]): +def create_configure_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog.parse_obj( + { + "streams": [ + { + "stream": { + "name": stream_name, + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] + } + ) + +def execute_command(source: DeclarativeSource, config: Mapping[str, Any]): command = config.get("__command") + command_config = config.get("__command_config") if command == "resolve_manifest": return connector_builder_handler.resolve_manifest(source) + elif command == "read": + stream_name = command_config["stream_name"] + configured_catalog = create_configure_catalog(stream_name) + logger = logging.getLogger(f"airbyte.{source.name}") + return source.read(logger, config, configured_catalog, None) raise ValueError(f"Unrecognized command {command}.") +def handle_connector_builder_request(source: DeclarativeSource, config: Mapping[str, Any]): + messages = execute_command(source, config) + for message in messages: + print(message.json(exclude_unset=True)) + def handle_connector_request(source: DeclarativeSource, args: List[str]): # Verify that the correct args are present for the production codepaths. From c6ac119ee59bbfc7e77db7c3a0f16a856711540b Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 14:46:05 -0800 Subject: [PATCH 07/71] paste unit tests from connector builder server --- .../unit_tests/connector_builder/test_read.py | 475 ++++++++++++++++++ 1 file changed, 475 insertions(+) create mode 100644 airbyte-cdk/python/unit_tests/connector_builder/test_read.py diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py new file mode 100644 index 000000000000..6f63abfa583f --- /dev/null +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -0,0 +1,475 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import asyncio +import json +from typing import Iterator +from unittest.mock import MagicMock + +import pytest +from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteRecordMessage, Level, Type +from connector_builder.generated.models.http_request import HttpRequest +from connector_builder.generated.models.http_response import HttpResponse +from connector_builder.generated.models.resolve_manifest import ResolveManifest +from connector_builder.generated.models.resolve_manifest_request_body import ResolveManifestRequestBody +from connector_builder.generated.models.stream_read import StreamRead +from connector_builder.generated.models.stream_read_pages import StreamReadPages +from connector_builder.generated.models.stream_read_request_body import StreamReadRequestBody +from connector_builder.generated.models.streams_list_read import StreamsListRead +from connector_builder.generated.models.streams_list_read_streams import StreamsListReadStreams +from connector_builder.generated.models.streams_list_request_body import StreamsListRequestBody +from connector_builder.impl.default_api import DefaultApiImpl +from connector_builder.impl.low_code_cdk_adapter import LowCodeSourceAdapterFactory +from fastapi import HTTPException +from pydantic.error_wrappers import ValidationError + +MAX_PAGES_PER_SLICE = 4 +MAX_SLICES = 3 + +MANIFEST = { + "version": "0.1.0", + "type": "DeclarativeSource", + "definitions": { + "selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"}, + "requester": {"url_base": "https://demonslayers.com/api/v1/", "http_method": "GET", "type": "DeclarativeSource"}, + "retriever": { + "type": "DeclarativeSource", + "record_selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"}, + "paginator": {"type": "NoPagination"}, + "requester": {"url_base": "https://demonslayers.com/api/v1/", "http_method": "GET", "type": "HttpRequester"}, + }, + "hashiras_stream": { + "retriever": { + "type": "DeclarativeSource", + "record_selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"}, + "paginator": {"type": "NoPagination"}, + "requester": {"url_base": "https://demonslayers.com/api/v1/", "http_method": "GET", "type": "HttpRequester"}, + }, + "$parameters": {"name": "hashiras", "path": "/hashiras"}, + }, + "breathing_techniques_stream": { + "retriever": { + "type": "DeclarativeSource", + "record_selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"}, + "paginator": {"type": "NoPagination"}, + "requester": {"url_base": "https://demonslayers.com/api/v1/", "http_method": "GET", "type": "HttpRequester"}, + }, + "$parameters": {"name": "breathing-techniques", "path": "/breathing_techniques"}, + }, + }, + "streams": [ + { + "type": "DeclarativeStream", + "retriever": { + "type": "SimpleRetriever", + "record_selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"}, + "paginator": {"type": "NoPagination"}, + "requester": {"url_base": "https://demonslayers.com/api/v1/", "http_method": "GET", "type": "HttpRequester"}, + }, + "$parameters": {"name": "hashiras", "path": "/hashiras"}, + }, + { + "type": "DeclarativeStream", + "retriever": { + "type": "SimpleRetriever", + "record_selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"}, + "paginator": {"type": "NoPagination"}, + "requester": {"url_base": "https://demonslayers.com/api/v1/", "http_method": "GET", "type": "HttpRequester"}, + }, + "$parameters": {"name": "breathing-techniques", "path": "/breathing_techniques"}, + }, + ], + "check": {"stream_names": ["hashiras"], "type": "CheckStream"}, +} + +CONFIG = {"rank": "upper-six"} + +def test_read_stream(): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "http_method": "GET", + "body": {"custom": "field"}, + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"} + expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"} + expected_pages = [ + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], + ), + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Mitsuri Kanroji"}], + ), + ] + + mock_source_adapter_cls = make_mock_adapter_factory( + iter( + [ + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + ] + ) + ) + + api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) + + loop = asyncio.get_event_loop() + actual_response: StreamRead = loop.run_until_complete( + api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) + ) + assert actual_response.inferred_schema == expected_schema + + single_slice = actual_response.slices[0] + for i, actual_page in enumerate(single_slice.pages): + assert actual_page == expected_pages[i] + +def test_read_stream(): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "http_method": "GET", + "body": {"custom": "field"}, + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"} + expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"} + expected_pages = [ + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], + ), + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Mitsuri Kanroji"}], + ), + ] + + mock_source_adapter_cls = make_mock_adapter_factory( + iter( + [ + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + ] + ) + ) + + api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) + + loop = asyncio.get_event_loop() + actual_response: StreamRead = loop.run_until_complete( + api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) + ) + assert actual_response.inferred_schema == expected_schema + + single_slice = actual_response.slices[0] + for i, actual_page in enumerate(single_slice.pages): + assert actual_page == expected_pages[i] + + +def test_read_stream_with_logs(): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + "http_method": "GET", + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + expected_pages = [ + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], + ), + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Mitsuri Kanroji"}], + ), + ] + expected_logs = [ + {"message": "log message before the request"}, + {"message": "log message during the page"}, + {"message": "log message after the response"}, + ] + + mock_source_adapter_cls = make_mock_adapter_factory( + iter( + [ + AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message before the request")), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message during the page")), + record_message("hashiras", {"name": "Muichiro Tokito"}), + AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message after the response")), + ] + ) + ) + + api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) + + loop = asyncio.get_event_loop() + actual_response: StreamRead = loop.run_until_complete( + api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) + ) + + single_slice = actual_response.slices[0] + for i, actual_page in enumerate(single_slice.pages): + assert actual_page == expected_pages[i] + + for i, actual_log in enumerate(actual_response.logs): + assert actual_log == expected_logs[i] + + +@pytest.mark.parametrize( + "request_record_limit, max_record_limit", + [ + pytest.param(1, 3, id="test_create_request_with_record_limit"), + pytest.param(3, 1, id="test_create_request_record_limit_exceeds_max"), + ], +) +def test_read_stream_record_limit(request_record_limit, max_record_limit): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + mock_source_adapter_cls = make_mock_adapter_factory( + iter( + [ + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + response_log_message(response), + ] + ) + ) + n_records = 2 + record_limit = min(request_record_limit, max_record_limit) + + api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) + loop = asyncio.get_event_loop() + actual_response: StreamRead = loop.run_until_complete( + api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras", record_limit=request_record_limit)) + ) + single_slice = actual_response.slices[0] + total_records = 0 + for i, actual_page in enumerate(single_slice.pages): + total_records += len(actual_page.records) + assert total_records == min([record_limit, n_records]) + + +@pytest.mark.parametrize( + "max_record_limit", + [ + pytest.param(2, id="test_create_request_no_record_limit"), + pytest.param(1, id="test_create_request_no_record_limit_n_records_exceed_max"), + ], +) +def test_read_stream_default_record_limit(max_record_limit): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + mock_source_adapter_cls = make_mock_adapter_factory( + iter( + [ + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + response_log_message(response), + ] + ) + ) + n_records = 2 + + api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) + loop = asyncio.get_event_loop() + actual_response: StreamRead = loop.run_until_complete( + api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) + ) + single_slice = actual_response.slices[0] + total_records = 0 + for i, actual_page in enumerate(single_slice.pages): + total_records += len(actual_page.records) + assert total_records == min([max_record_limit, n_records]) + + +def test_read_stream_limit_0(): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + mock_source_adapter_cls = make_mock_adapter_factory( + iter( + [ + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + response_log_message(response), + ] + ) + ) + api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) + loop = asyncio.get_event_loop() + + with pytest.raises(ValidationError): + loop.run_until_complete(api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras", record_limit=0))) + loop.run_until_complete(api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras"))) + + +def test_read_stream_no_records(): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + "http_method": "GET", + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + expected_pages = [ + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[], + ), + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[], + ), + ] + + mock_source_adapter_cls = make_mock_adapter_factory( + iter( + [ + request_log_message(request), + response_log_message(response), + request_log_message(request), + response_log_message(response), + ] + ) + ) + + api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) + + loop = asyncio.get_event_loop() + actual_response: StreamRead = loop.run_until_complete( + api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) + ) + + single_slice = actual_response.slices[0] + for i, actual_page in enumerate(single_slice.pages): + assert actual_page == expected_pages[i] + +def test_read_stream_invalid_group_format(): + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + + mock_source_adapter_cls = make_mock_adapter_factory( + iter( + [ + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + ] + ) + ) + + api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) + + loop = asyncio.get_event_loop() + with pytest.raises(HTTPException) as actual_exception: + loop.run_until_complete(api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras"))) + + assert actual_exception.value.status_code == 400 + + +def test_read_stream_returns_error_if_stream_does_not_exist(): + expected_status_code = 400 + + api = DefaultApiImpl(LowCodeSourceAdapterFactory(MAX_PAGES_PER_SLICE, MAX_SLICES), MAX_PAGES_PER_SLICE, MAX_SLICES) + loop = asyncio.get_event_loop() + with pytest.raises(HTTPException) as actual_exception: + loop.run_until_complete(api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config={}, stream="not_in_manifest"))) + + assert actual_exception.value.status_code == expected_status_code From f949521c98c7897d378d22a037b5b7c4fa49b6b1 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 15:40:41 -0800 Subject: [PATCH 08/71] compiles but tests fail --- .../connector_builder_handler.py | 267 ++++++++++++- .../connector_builder/message_grouper.py | 10 + .../source_declarative_manifest/main.py | 16 - .../unit_tests/connector_builder/test_read.py | 368 ++---------------- 4 files changed, 297 insertions(+), 364 deletions(-) create mode 100644 airbyte-cdk/python/connector_builder/message_grouper.py diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index a1d8f6a6f647..e907d32e0ac3 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -3,18 +3,260 @@ # from datetime import datetime -from typing import Union +from typing import List -from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage +from airbyte_cdk.models import AirbyteRecordMessage +from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.utils.traced_exception import AirbyteTracedException +from dataclasses import asdict, dataclass +import json +from json import JSONDecodeError +from typing import Any, Dict, Iterable, Iterator, Optional, Union +from urllib.parse import parse_qs, urlparse + +from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type +from airbyte_cdk.utils.schema_inferrer import SchemaInferrer +import logging +from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode + +logger = logging.getLogger("airbyte.connector-builder") + +@dataclass +class HttpResponse: + status: int + body: Optional[str] + headers: Optional[Dict[str, Any]] + +@dataclass +class HttpRequest: + url: str + parameters: Optional[Dict[str, Any]] + body: Optional[Dict[str, Any]] + headers: Optional[Dict[str, Any]] + http_method: str +@dataclass +class StreamReadPages: + records: List[object] + request: Optional[HttpRequest] = None + response: Optional[HttpResponse] = None + +@dataclass +class StreamReadSlicesInnerPagesInner: + + records: List[object] + request: Optional[HttpRequest] + response: Optional[HttpResponse] + +@dataclass +class StreamReadSlicesInnerSliceDescriptor: + start_datetime: Optional[datetime] + list_item: Optional[str] + +@dataclass +class StreamReadSlicesInner: + pages: List[StreamReadSlicesInnerPagesInner] + slice_descriptor: Optional[StreamReadSlicesInnerSliceDescriptor] + state: Optional[Dict[str, Any]] + +@dataclass +class StreamRead(object): + logs: List[object] + slices: List[StreamReadSlicesInner] + test_read_limit_reached: bool + inferred_schema: Optional[Dict[str, Any]] + +@dataclass +class StreamReadRequestBody: + manifest: Dict[str, Any] + stream: str + config: Dict[str, Any] + state: Optional[Dict[str, Any]] + record_limit: Optional[int] + +#FIXME: can dataclasses also have validators? +""" + @validator("record_limit") + def record_limit_max(cls, value): + assert value <= 1000 + return value + + @validator("record_limit") + def record_limit_min(cls, value): + assert value >= 1 + return value +""" + +@dataclass +class StreamReadSliceDescriptor: + start_datetime: Optional[datetime] = None + list_item: Optional[str] = None + +@dataclass +class StreamReadSlices: + pages: List[StreamReadPages] + slice_descriptor: Optional[StreamReadSliceDescriptor] = None + state: Optional[Dict[str, Any]] = None def list_streams() -> AirbyteRecordMessage: raise NotImplementedError -def stream_read() -> AirbyteRecordMessage: - raise NotImplementedError + +class ConnectorBuilderHandler: + def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: int = 1000): + self._max_pages_per_slice = max_pages_per_slice + self._max_slices = max_slices + self.max_record_limit = max_record_limit + def read_stream( + self, + source: DeclarativeSource, + config: Dict[str, Any], + stream: str, + record_limit: Optional[int] = None, + ) -> AirbyteMessage: + schema_inferrer = SchemaInferrer() + + if record_limit is None: + record_limit = self.max_record_limit + else: + record_limit = min(record_limit, self.max_record_limit) + + slices = [] + log_messages = [] + state = {} # No support for incremental sync + catalog = _create_configure_catalog(stream) + for message_group in self._get_message_groups( + source.read(logger, config, catalog, state), + schema_inferrer, + record_limit, + ): + if isinstance(message_group, AirbyteLogMessage): + log_messages.append({"message": message_group.message}) + else: + slices.append(message_group) + + return AirbyteMessage(type=Type.RECORD, record=AirbyteRecordMessage( + stream="_test_read", + emitted_at=_emitted_at(), + data=asdict(StreamRead( + logs=log_messages, + slices=slices, + test_read_limit_reached=self._has_reached_limit(slices), + inferred_schema=schema_inferrer.get_stream_schema(stream), + )))) + + def _get_message_groups( + self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int + ) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]: + """ + Message groups are partitioned according to when request log messages are received. Subsequent response log messages + and record messages belong to the prior request log message and when we encounter another request, append the latest + message group, until records have been read. + + Messages received from the CDK read operation will always arrive in the following order: + {type: LOG, log: {message: "request: ..."}} + {type: LOG, log: {message: "response: ..."}} + ... 0 or more record messages + {type: RECORD, record: {data: ...}} + {type: RECORD, record: {data: ...}} + Repeats for each request/response made + + Note: The exception is that normal log messages can be received at any time which are not incorporated into grouping + """ + records_count = 0 + at_least_one_page_in_group = False + current_page_records = [] + current_slice_pages = [] + current_page_request: Optional[HttpRequest] = None + current_page_response: Optional[HttpResponse] = None + + while records_count < limit and (message := next(messages, None)): + if self._need_to_close_page(at_least_one_page_in_group, message): + self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records) + current_page_request = None + current_page_response = None + + if at_least_one_page_in_group and message.type == Type.LOG and message.log.message.startswith("slice:"): + yield StreamReadSlices(pages=current_slice_pages) + current_slice_pages = [] + at_least_one_page_in_group = False + elif message.type == Type.LOG and message.log.message.startswith("request:"): + if not at_least_one_page_in_group: + at_least_one_page_in_group = True + current_page_request = self._create_request_from_log_message(message.log) + elif message.type == Type.LOG and message.log.message.startswith("response:"): + current_page_response = self._create_response_from_log_message(message.log) + elif message.type == Type.LOG: + yield message.log + elif message.type == Type.RECORD: + current_page_records.append(message.record.data) + records_count += 1 + schema_inferrer.accumulate(message.record) + else: + self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records) + yield StreamReadSlices(pages=current_slice_pages) + + @staticmethod + def _need_to_close_page(at_least_one_page_in_group, message): + return ( + at_least_one_page_in_group + and message.type == Type.LOG + and (message.log.message.startswith("request:") or message.log.message.startswith("slice:")) + ) + + @staticmethod + def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records): + if not current_page_request or not current_page_response: + raise ValueError("Every message grouping should have at least one request and response") + + current_slice_pages.append( + StreamReadPages(request=current_page_request, response=current_page_response, records=current_page_records) + ) + current_page_records.clear() + + def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]: + # TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the + # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the + # protocol change is worked on. + raw_request = log_message.message.partition("request:")[2] + try: + request = json.loads(raw_request) + url = urlparse(request.get("url", "")) + full_path = f"{url.scheme}://{url.hostname}{url.path}" if url else "" + parameters = parse_qs(url.query) or None + return HttpRequest( + url=full_path, + http_method=request.get("http_method", ""), + headers=request.get("headers"), + parameters=parameters, + body=request.get("body"), + ) + except JSONDecodeError as error: + self.logger.warning(f"Failed to parse log message into request object with error: {error}") + return None + + def _create_response_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpResponse]: + # TODO: As a temporary stopgap, the CDK emits response data as a log message string. Ideally this should come in the + # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the + # protocol change is worked on. + raw_response = log_message.message.partition("response:")[2] + try: + response = json.loads(raw_response) + body = response.get("body", "{}") + return HttpResponse(status=response.get("status_code"), body=body, headers=response.get("headers")) + except JSONDecodeError as error: + self.logger.warning(f"Failed to parse log message into response object with error: {error}") + return None + + def _has_reached_limit(self, slices): + if len(slices) >= self._max_slices: + return True + + for slice in slices: + if len(slice.pages) >= self._max_pages_per_slice: + return True + return False def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: @@ -31,3 +273,20 @@ def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: def _emitted_at(): return int(datetime.now().timestamp()) * 1000 + +def _create_configure_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog.parse_obj( + { + "streams": [ + { + "stream": { + "name": stream_name, + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] + } + ) diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py new file mode 100644 index 000000000000..726efe114c31 --- /dev/null +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -0,0 +1,10 @@ +from typing import Iterable, Iterator + +from airbyte_cdk.models import AirbyteMessage +from airbyte_cdk.utils.schema_inferrer import SchemaInferrer + + +class MessageGrouper: + + def get_message_groups(self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int) -> Iterable: #FIXME: set right return type + pass diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 676fa498637d..d8db69c2c2a0 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -43,22 +43,6 @@ def preparse(args: List[str]) -> Tuple[str, str, str]: parsed, _ = parser.parse_known_args(args) return parsed.command, parsed.config -def create_configure_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: - return ConfiguredAirbyteCatalog.parse_obj( - { - "streams": [ - { - "stream": { - "name": stream_name, - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - } - ] - } - ) def execute_command(source: DeclarativeSource, config: Mapping[str, Any]): command = config.get("__command") diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 6f63abfa583f..c4f863ab98d3 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -4,26 +4,14 @@ import asyncio import json -from typing import Iterator from unittest.mock import MagicMock import pytest -from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteRecordMessage, Level, Type -from connector_builder.generated.models.http_request import HttpRequest -from connector_builder.generated.models.http_response import HttpResponse -from connector_builder.generated.models.resolve_manifest import ResolveManifest -from connector_builder.generated.models.resolve_manifest_request_body import ResolveManifestRequestBody -from connector_builder.generated.models.stream_read import StreamRead -from connector_builder.generated.models.stream_read_pages import StreamReadPages -from connector_builder.generated.models.stream_read_request_body import StreamReadRequestBody -from connector_builder.generated.models.streams_list_read import StreamsListRead -from connector_builder.generated.models.streams_list_read_streams import StreamsListReadStreams -from connector_builder.generated.models.streams_list_request_body import StreamsListRequestBody -from connector_builder.impl.default_api import DefaultApiImpl -from connector_builder.impl.low_code_cdk_adapter import LowCodeSourceAdapterFactory -from fastapi import HTTPException from pydantic.error_wrappers import ValidationError +from airbyte_cdk.models import Level, Type +from connector_builder.connector_builder_handler import * + MAX_PAGES_PER_SLICE = 4 MAX_SLICES = 3 @@ -119,67 +107,7 @@ def test_read_stream(): ), ] - mock_source_adapter_cls = make_mock_adapter_factory( - iter( - [ - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Shinobu Kocho"}), - record_message("hashiras", {"name": "Muichiro Tokito"}), - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Mitsuri Kanroji"}), - ] - ) - ) - - api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) - - loop = asyncio.get_event_loop() - actual_response: StreamRead = loop.run_until_complete( - api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) - ) - assert actual_response.inferred_schema == expected_schema - - single_slice = actual_response.slices[0] - for i, actual_page in enumerate(single_slice.pages): - assert actual_page == expected_pages[i] - -def test_read_stream(): - request = { - "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", - "headers": {"Content-Type": "application/json"}, - "http_method": "GET", - "body": {"custom": "field"}, - } - response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"} - expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"} - expected_pages = [ - StreamReadPages( - request=HttpRequest( - url="https://demonslayers.com/api/v1/hashiras", - parameters={"era": ["taisho"]}, - headers={"Content-Type": "application/json"}, - body={"custom": "field"}, - http_method="GET", - ), - response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), - records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], - ), - StreamReadPages( - request=HttpRequest( - url="https://demonslayers.com/api/v1/hashiras", - parameters={"era": ["taisho"]}, - headers={"Content-Type": "application/json"}, - body={"custom": "field"}, - http_method="GET", - ), - response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), - records=[{"name": "Mitsuri Kanroji"}], - ), - ] - - mock_source_adapter_cls = make_mock_adapter_factory( + mock_source = make_mock_source( iter( [ request_log_message(request), @@ -193,283 +121,35 @@ def test_read_stream(): ) ) - api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) + connector_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + actual_response: AirbyteMessage = connector_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + record = actual_response.record + stream_read_object: StreamRead = StreamRead(**record.data) + stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] + print(stream_read_object) + assert stream_read_object.inferred_schema == expected_schema - loop = asyncio.get_event_loop() - actual_response: StreamRead = loop.run_until_complete( - api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) - ) - assert actual_response.inferred_schema == expected_schema - - single_slice = actual_response.slices[0] + single_slice = stream_read_object.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] +def make_mock_source(return_value: Iterator) -> MagicMock: + mock_source = MagicMock() + mock_source.read.return_value = return_value + return mock_source -def test_read_stream_with_logs(): - request = { - "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - "http_method": "GET", - } - response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} - expected_pages = [ - StreamReadPages( - request=HttpRequest( - url="https://demonslayers.com/api/v1/hashiras", - parameters={"era": ["taisho"]}, - headers={"Content-Type": "application/json"}, - body={"custom": "field"}, - http_method="GET", - ), - response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), - records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], - ), - StreamReadPages( - request=HttpRequest( - url="https://demonslayers.com/api/v1/hashiras", - parameters={"era": ["taisho"]}, - headers={"Content-Type": "application/json"}, - body={"custom": "field"}, - http_method="GET", - ), - response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), - records=[{"name": "Mitsuri Kanroji"}], - ), - ] - expected_logs = [ - {"message": "log message before the request"}, - {"message": "log message during the page"}, - {"message": "log message after the response"}, - ] - - mock_source_adapter_cls = make_mock_adapter_factory( - iter( - [ - AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message before the request")), - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Shinobu Kocho"}), - AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message during the page")), - record_message("hashiras", {"name": "Muichiro Tokito"}), - AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message after the response")), - ] - ) - ) - - api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) - - loop = asyncio.get_event_loop() - actual_response: StreamRead = loop.run_until_complete( - api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) - ) - - single_slice = actual_response.slices[0] - for i, actual_page in enumerate(single_slice.pages): - assert actual_page == expected_pages[i] - - for i, actual_log in enumerate(actual_response.logs): - assert actual_log == expected_logs[i] - - -@pytest.mark.parametrize( - "request_record_limit, max_record_limit", - [ - pytest.param(1, 3, id="test_create_request_with_record_limit"), - pytest.param(3, 1, id="test_create_request_record_limit_exceeds_max"), - ], -) -def test_read_stream_record_limit(request_record_limit, max_record_limit): - request = { - "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - } - response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} - mock_source_adapter_cls = make_mock_adapter_factory( - iter( - [ - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Shinobu Kocho"}), - record_message("hashiras", {"name": "Muichiro Tokito"}), - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Mitsuri Kanroji"}), - response_log_message(response), - ] - ) - ) - n_records = 2 - record_limit = min(request_record_limit, max_record_limit) - - api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - loop = asyncio.get_event_loop() - actual_response: StreamRead = loop.run_until_complete( - api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras", record_limit=request_record_limit)) - ) - single_slice = actual_response.slices[0] - total_records = 0 - for i, actual_page in enumerate(single_slice.pages): - total_records += len(actual_page.records) - assert total_records == min([record_limit, n_records]) - - -@pytest.mark.parametrize( - "max_record_limit", - [ - pytest.param(2, id="test_create_request_no_record_limit"), - pytest.param(1, id="test_create_request_no_record_limit_n_records_exceed_max"), - ], -) -def test_read_stream_default_record_limit(max_record_limit): - request = { - "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - } - response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} - mock_source_adapter_cls = make_mock_adapter_factory( - iter( - [ - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Shinobu Kocho"}), - record_message("hashiras", {"name": "Muichiro Tokito"}), - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Mitsuri Kanroji"}), - response_log_message(response), - ] - ) - ) - n_records = 2 - - api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - loop = asyncio.get_event_loop() - actual_response: StreamRead = loop.run_until_complete( - api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) - ) - single_slice = actual_response.slices[0] - total_records = 0 - for i, actual_page in enumerate(single_slice.pages): - total_records += len(actual_page.records) - assert total_records == min([max_record_limit, n_records]) - - -def test_read_stream_limit_0(): - request = { - "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - } - response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} - mock_source_adapter_cls = make_mock_adapter_factory( - iter( - [ - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Shinobu Kocho"}), - record_message("hashiras", {"name": "Muichiro Tokito"}), - request_log_message(request), - response_log_message(response), - record_message("hashiras", {"name": "Mitsuri Kanroji"}), - response_log_message(response), - ] - ) - ) - api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) - loop = asyncio.get_event_loop() - - with pytest.raises(ValidationError): - loop.run_until_complete(api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras", record_limit=0))) - loop.run_until_complete(api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras"))) - - -def test_read_stream_no_records(): - request = { - "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - "http_method": "GET", - } - response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} - expected_pages = [ - StreamReadPages( - request=HttpRequest( - url="https://demonslayers.com/api/v1/hashiras", - parameters={"era": ["taisho"]}, - headers={"Content-Type": "application/json"}, - body={"custom": "field"}, - http_method="GET", - ), - response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), - records=[], - ), - StreamReadPages( - request=HttpRequest( - url="https://demonslayers.com/api/v1/hashiras", - parameters={"era": ["taisho"]}, - headers={"Content-Type": "application/json"}, - body={"custom": "field"}, - http_method="GET", - ), - response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), - records=[], - ), - ] - - mock_source_adapter_cls = make_mock_adapter_factory( - iter( - [ - request_log_message(request), - response_log_message(response), - request_log_message(request), - response_log_message(response), - ] - ) - ) - - api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) - - loop = asyncio.get_event_loop() - actual_response: StreamRead = loop.run_until_complete( - api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras")) - ) - - single_slice = actual_response.slices[0] - for i, actual_page in enumerate(single_slice.pages): - assert actual_page == expected_pages[i] - -def test_read_stream_invalid_group_format(): - response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} - - mock_source_adapter_cls = make_mock_adapter_factory( - iter( - [ - response_log_message(response), - record_message("hashiras", {"name": "Shinobu Kocho"}), - record_message("hashiras", {"name": "Muichiro Tokito"}), - ] - ) - ) +def request_log_message(request: dict) -> AirbyteMessage: + return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=f"request:{json.dumps(request)}")) - api = DefaultApiImpl(mock_source_adapter_cls, MAX_PAGES_PER_SLICE, MAX_SLICES) - loop = asyncio.get_event_loop() - with pytest.raises(HTTPException) as actual_exception: - loop.run_until_complete(api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config=CONFIG, stream="hashiras"))) +def response_log_message(response: dict) -> AirbyteMessage: + return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=f"response:{json.dumps(response)}")) - assert actual_exception.value.status_code == 400 +def record_message(stream: str, data: dict) -> AirbyteMessage: + return AirbyteMessage(type=Type.RECORD, record=AirbyteRecordMessage(stream=stream, data=data, emitted_at=1234)) -def test_read_stream_returns_error_if_stream_does_not_exist(): - expected_status_code = 400 - api = DefaultApiImpl(LowCodeSourceAdapterFactory(MAX_PAGES_PER_SLICE, MAX_SLICES), MAX_PAGES_PER_SLICE, MAX_SLICES) - loop = asyncio.get_event_loop() - with pytest.raises(HTTPException) as actual_exception: - loop.run_until_complete(api.read_stream(StreamReadRequestBody(manifest=MANIFEST, config={}, stream="not_in_manifest"))) +def slice_message() -> AirbyteMessage: + return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message='slice:{"key": "value"}')) - assert actual_exception.value.status_code == expected_status_code From 45741cdfd3d486d19441192ae34b1b9224358194 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 15:53:02 -0800 Subject: [PATCH 09/71] first test passes --- .../connector_builder_handler.py | 4 +- .../unit_tests/connector_builder/test_read.py | 45 +++++++++---------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index e907d32e0ac3..86a98969c27e 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -10,6 +10,7 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException from dataclasses import asdict, dataclass +from copy import deepcopy import json from json import JSONDecodeError from typing import Any, Dict, Iterable, Iterator, Optional, Union @@ -190,6 +191,7 @@ def _get_message_groups( elif message.type == Type.LOG: yield message.log elif message.type == Type.RECORD: + print(f"record! {message.record.data}") current_page_records.append(message.record.data) records_count += 1 schema_inferrer.accumulate(message.record) @@ -211,7 +213,7 @@ def _close_page(current_page_request, current_page_response, current_slice_pages raise ValueError("Every message grouping should have at least one request and response") current_slice_pages.append( - StreamReadPages(request=current_page_request, response=current_page_response, records=current_page_records) + StreamReadPages(request=current_page_request, response=current_page_response, records=deepcopy(current_page_records)) ) current_page_records.clear() diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index c4f863ab98d3..e3b57c70ddcc 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -83,28 +83,28 @@ def test_read_stream(): response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"} expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"} expected_pages = [ - StreamReadPages( - request=HttpRequest( - url="https://demonslayers.com/api/v1/hashiras", - parameters={"era": ["taisho"]}, - headers={"Content-Type": "application/json"}, - body={"custom": "field"}, - http_method="GET", - ), - response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), - records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], - ), - StreamReadPages( - request=HttpRequest( - url="https://demonslayers.com/api/v1/hashiras", - parameters={"era": ["taisho"]}, - headers={"Content-Type": "application/json"}, - body={"custom": "field"}, - http_method="GET", - ), - response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), - records=[{"name": "Mitsuri Kanroji"}], - ), + { + "request":{ + "url":"https://demonslayers.com/api/v1/hashiras", + "parameters":{"era": ["taisho"]}, + "headers": {"Content-Type": "application/json"}, + "body":{"custom": "field"}, + "http_method":"GET", + }, + "response":{"status":200, "headers":{"field": "value"}, "body":'{"name": "field"}'}, + "records":[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], + }, + { + "request":{ + "url": "https://demonslayers.com/api/v1/hashiras", + "parameters": {"era": ["taisho"]}, + "headers": {"Content-Type": "application/json"}, + "body":{"custom": "field"}, + "http_method":"GET", + }, + "response":{"status": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'}, + "records":[{"name": "Mitsuri Kanroji"}], + }, ] mock_source = make_mock_source( @@ -126,7 +126,6 @@ def test_read_stream(): record = actual_response.record stream_read_object: StreamRead = StreamRead(**record.data) stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] - print(stream_read_object) assert stream_read_object.inferred_schema == expected_schema single_slice = stream_read_object.slices[0] From 0e7d6f4669540c2f33fa776be46b138d90d01384 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 15:59:53 -0800 Subject: [PATCH 10/71] Second test passes --- .../unit_tests/connector_builder/test_read.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index e3b57c70ddcc..22a0c9cac2cd 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -132,6 +132,72 @@ def test_read_stream(): for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] +def test_read_stream_with_logs(): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + "http_method": "GET", + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + expected_pages = [ + { + "request":{ + "url": "https://demonslayers.com/api/v1/hashiras", + "parameters": {"era": ["taisho"]}, + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + "http_method": "GET", + }, + "response":{"status": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'}, + "records":[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], + }, + { + "request":{ + "url":"https://demonslayers.com/api/v1/hashiras", + "parameters": {"era": ["taisho"]}, + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + "http_method": "GET", + }, + "response":{"status":200, "headers":{"field": "value"}, "body": '{"name": "field"}'}, + "records":[{"name": "Mitsuri Kanroji"}], + }, + ] + expected_logs = [ + {"message": "log message before the request"}, + {"message": "log message during the page"}, + {"message": "log message after the response"}, + ] + + mock_source = make_mock_source( + iter( + [ + AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message before the request")), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message during the page")), + record_message("hashiras", {"name": "Muichiro Tokito"}), + AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message after the response")), + ] + ) + ) + + connector_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + + actual_response: AirbyteMessage = connector_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + record = actual_response.record + stream_read_object: StreamRead = StreamRead(**record.data) + stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] + + single_slice = stream_read_object.slices[0] + for i, actual_page in enumerate(single_slice.pages): + assert actual_page == expected_pages[i] + + for i, actual_log in enumerate(stream_read_object.logs): + assert actual_log == expected_logs[i] + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From 81ff6e9091369812a205dbc77e47b92365443b3f Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:03:16 -0800 Subject: [PATCH 11/71] 3rd test passes --- .../unit_tests/connector_builder/test_read.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 22a0c9cac2cd..dffc6004b603 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -198,6 +198,48 @@ def test_read_stream_with_logs(): for i, actual_log in enumerate(stream_read_object.logs): assert actual_log == expected_logs[i] +@pytest.mark.parametrize( + "request_record_limit, max_record_limit", + [ + pytest.param(1, 3, id="test_create_request_with_record_limit"), + pytest.param(3, 1, id="test_create_request_record_limit_exceeds_max"), + ], +) +def test_read_stream_record_limit(request_record_limit, max_record_limit): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + mock_source = make_mock_source( + iter( + [ + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + response_log_message(response), + ] + ) + ) + n_records = 2 + record_limit = min(request_record_limit, max_record_limit) + + api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) + actual_response: AirbyteMessage = api.read_stream(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) + record = actual_response.record + stream_read_object: StreamRead = StreamRead(**record.data) + stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] + single_slice = stream_read_object.slices[0] + total_records = 0 + for i, actual_page in enumerate(single_slice.pages): + total_records += len(actual_page["records"]) + assert total_records == min([record_limit, n_records]) + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From 14aa8ca594a2229d98d8d7b512f36ccf4e38fa54 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:04:42 -0800 Subject: [PATCH 12/71] one more test --- .../unit_tests/connector_builder/test_read.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index dffc6004b603..9edfac543d52 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -240,6 +240,47 @@ def test_read_stream_record_limit(request_record_limit, max_record_limit): total_records += len(actual_page["records"]) assert total_records == min([record_limit, n_records]) +@pytest.mark.parametrize( + "max_record_limit", + [ + pytest.param(2, id="test_create_request_no_record_limit"), + pytest.param(1, id="test_create_request_no_record_limit_n_records_exceed_max"), + ], +) +def test_read_stream_default_record_limit(max_record_limit): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + mock_source = make_mock_source( + iter( + [ + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + response_log_message(response), + ] + ) + ) + n_records = 2 + + api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) + actual_response: AirbyteMessage = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + record = actual_response.record + stream_read_object: StreamRead = StreamRead(**record.data) + stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] + single_slice = stream_read_object.slices[0] + total_records = 0 + for i, actual_page in enumerate(single_slice.pages): + total_records += len(actual_page["records"]) + assert total_records == min([max_record_limit, n_records]) + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From b3764ba581bf4672e479cb9f15b8914b4db6c8ec Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:12:22 -0800 Subject: [PATCH 13/71] another test --- .../connector_builder_handler.py | 12 +++++++-- .../unit_tests/connector_builder/test_read.py | 26 +++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 86a98969c27e..c337d04cb4ad 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -75,6 +75,12 @@ class StreamReadRequestBody: state: Optional[Dict[str, Any]] record_limit: Optional[int] + def __post_init__(self): + print(self.record_limit) + raise ValueError("here") + if not (1 <= self.record_limit <= 1000): + raise ValueError("") #FIXME + #FIXME: can dataclasses also have validators? """ @validator("record_limit") @@ -116,6 +122,8 @@ def read_stream( stream: str, record_limit: Optional[int] = None, ) -> AirbyteMessage: + if record_limit is not None and not (1 <= record_limit <= 1000): + raise ValueError("") schema_inferrer = SchemaInferrer() if record_limit is None: @@ -209,8 +217,8 @@ def _need_to_close_page(at_least_one_page_in_group, message): @staticmethod def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records): - if not current_page_request or not current_page_response: - raise ValueError("Every message grouping should have at least one request and response") + #if not current_page_request or not current_page_response: + # raise ValueError("Every message grouping should have at least one request and response") current_slice_pages.append( StreamReadPages(request=current_page_request, response=current_page_response, records=deepcopy(current_page_records)) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 9edfac543d52..23b9268e3b71 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -281,6 +281,32 @@ def test_read_stream_default_record_limit(max_record_limit): total_records += len(actual_page["records"]) assert total_records == min([max_record_limit, n_records]) +def test_read_stream_limit_0(): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + mock_source = make_mock_source( + iter( + [ + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + response_log_message(response), + ] + ) + ) + api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + + with pytest.raises(ValueError): + api.read_stream(source=mock_source, config=CONFIG, stream="hashiras", record_limit=0) + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From c6040cc4bc4e508c2ca309312609831d7ff8391d Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:16:42 -0800 Subject: [PATCH 14/71] one more test --- .../unit_tests/connector_builder/test_read.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 23b9268e3b71..786df6ecfb1b 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -307,6 +307,61 @@ def test_read_stream_limit_0(): with pytest.raises(ValueError): api.read_stream(source=mock_source, config=CONFIG, stream="hashiras", record_limit=0) +def test_read_stream_no_records(): + request = { + "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + "http_method": "GET", + } + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + expected_pages = [ + { + "request":{ + "url":"https://demonslayers.com/api/v1/hashiras", + "parameters": {"era": ["taisho"]}, + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + "http_method": "GET", + }, + "response":{"status": 200, "headers": {"field": "value"}, "body":'{"name": "field"}'}, + "records":[], + }, + { + "request":{ + "url": "https://demonslayers.com/api/v1/hashiras", + "parameters": {"era": ["taisho"]}, + "headers": {"Content-Type": "application/json"}, + "body": {"custom": "field"}, + "http_method": "GET", + }, + "response": {"status":200, "headers":{"field": "value"}, "body": '{"name": "field"}'}, + "records": [], + }, + ] + + mock_source = make_mock_source( + iter( + [ + request_log_message(request), + response_log_message(response), + request_log_message(request), + response_log_message(response), + ] + ) + ) + + api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + + actual_response: AirbyteMessage = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + record = actual_response.record + stream_read_object: StreamRead = StreamRead(**record.data) + stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] + + single_slice = stream_read_object.slices[0] + for i, actual_page in enumerate(single_slice.pages): + assert actual_page == expected_pages[i] + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From 5f0ead197dbb655c64d45d9fd02a8ea214a0972a Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:24:06 -0800 Subject: [PATCH 15/71] test --- .../connector_builder_handler.py | 4 ++-- .../unit_tests/connector_builder/test_read.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index c337d04cb4ad..500f0d639a8b 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -217,8 +217,8 @@ def _need_to_close_page(at_least_one_page_in_group, message): @staticmethod def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records): - #if not current_page_request or not current_page_response: - # raise ValueError("Every message grouping should have at least one request and response") + if not current_page_request or not current_page_response: + raise ValueError("Every message grouping should have at least one request and response") current_slice_pages.append( StreamReadPages(request=current_page_request, response=current_page_response, records=deepcopy(current_page_records)) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 786df6ecfb1b..8a7c13fbf452 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -362,6 +362,24 @@ def test_read_stream_no_records(): for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] +def test_read_stream_invalid_group_format(): + response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} + + mock_source = make_mock_source( + iter( + [ + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Muichiro Tokito"}), + ] + ) + ) + + api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + + with pytest.raises(ValueError) as actual_exception: + api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From 1614badd9ca6f30a3340cdbb8f48f89a9578cf9b Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:38:22 -0800 Subject: [PATCH 16/71] return StreamRead --- .../connector_builder_handler.py | 11 +- .../unit_tests/connector_builder/test_read.py | 172 ++++++++---------- 2 files changed, 82 insertions(+), 101 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 500f0d639a8b..3efb73f3d884 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -121,7 +121,7 @@ def read_stream( config: Dict[str, Any], stream: str, record_limit: Optional[int] = None, - ) -> AirbyteMessage: + ) -> StreamRead: if record_limit is not None and not (1 <= record_limit <= 1000): raise ValueError("") schema_inferrer = SchemaInferrer() @@ -145,15 +145,12 @@ def read_stream( else: slices.append(message_group) - return AirbyteMessage(type=Type.RECORD, record=AirbyteRecordMessage( - stream="_test_read", - emitted_at=_emitted_at(), - data=asdict(StreamRead( + return StreamRead( logs=log_messages, slices=slices, test_read_limit_reached=self._has_reached_limit(slices), - inferred_schema=schema_inferrer.get_stream_schema(stream), - )))) + inferred_schema=schema_inferrer.get_stream_schema(stream) + ) def _get_message_groups( self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 8a7c13fbf452..936985594af6 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -83,28 +83,28 @@ def test_read_stream(): response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}', "http_method": "GET"} expected_schema = {"$schema": "http://json-schema.org/schema#", "properties": {"name": {"type": "string"}}, "type": "object"} expected_pages = [ - { - "request":{ - "url":"https://demonslayers.com/api/v1/hashiras", - "parameters":{"era": ["taisho"]}, - "headers": {"Content-Type": "application/json"}, - "body":{"custom": "field"}, - "http_method":"GET", - }, - "response":{"status":200, "headers":{"field": "value"}, "body":'{"name": "field"}'}, - "records":[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], - }, - { - "request":{ - "url": "https://demonslayers.com/api/v1/hashiras", - "parameters": {"era": ["taisho"]}, - "headers": {"Content-Type": "application/json"}, - "body":{"custom": "field"}, - "http_method":"GET", - }, - "response":{"status": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'}, - "records":[{"name": "Mitsuri Kanroji"}], - }, + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], + ), + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Mitsuri Kanroji"}], + ), ] mock_source = make_mock_source( @@ -122,13 +122,10 @@ def test_read_stream(): ) connector_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: AirbyteMessage = connector_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") - record = actual_response.record - stream_read_object: StreamRead = StreamRead(**record.data) - stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] - assert stream_read_object.inferred_schema == expected_schema + actual_response: StreamRead = connector_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + assert actual_response.inferred_schema == expected_schema - single_slice = stream_read_object.slices[0] + single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] @@ -141,28 +138,28 @@ def test_read_stream_with_logs(): } response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} expected_pages = [ - { - "request":{ - "url": "https://demonslayers.com/api/v1/hashiras", - "parameters": {"era": ["taisho"]}, - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - "http_method": "GET", - }, - "response":{"status": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'}, - "records":[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], - }, - { - "request":{ - "url":"https://demonslayers.com/api/v1/hashiras", - "parameters": {"era": ["taisho"]}, - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - "http_method": "GET", - }, - "response":{"status":200, "headers":{"field": "value"}, "body": '{"name": "field"}'}, - "records":[{"name": "Mitsuri Kanroji"}], - }, + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}], + ), + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[{"name": "Mitsuri Kanroji"}], + ), ] expected_logs = [ {"message": "log message before the request"}, @@ -187,15 +184,11 @@ def test_read_stream_with_logs(): connector_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) actual_response: AirbyteMessage = connector_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") - record = actual_response.record - stream_read_object: StreamRead = StreamRead(**record.data) - stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] - - single_slice = stream_read_object.slices[0] + single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] - for i, actual_log in enumerate(stream_read_object.logs): + for i, actual_log in enumerate(actual_response.logs): assert actual_log == expected_logs[i] @pytest.mark.parametrize( @@ -230,14 +223,11 @@ def test_read_stream_record_limit(request_record_limit, max_record_limit): record_limit = min(request_record_limit, max_record_limit) api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: AirbyteMessage = api.read_stream(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) - record = actual_response.record - stream_read_object: StreamRead = StreamRead(**record.data) - stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] - single_slice = stream_read_object.slices[0] + actual_response: StreamRead = api.read_stream(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) + single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): - total_records += len(actual_page["records"]) + total_records += len(actual_page.records) assert total_records == min([record_limit, n_records]) @pytest.mark.parametrize( @@ -271,14 +261,11 @@ def test_read_stream_default_record_limit(max_record_limit): n_records = 2 api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: AirbyteMessage = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") - record = actual_response.record - stream_read_object: StreamRead = StreamRead(**record.data) - stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] - single_slice = stream_read_object.slices[0] + actual_response: StreamRead = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): - total_records += len(actual_page["records"]) + total_records += len(actual_page.records) assert total_records == min([max_record_limit, n_records]) def test_read_stream_limit_0(): @@ -316,28 +303,28 @@ def test_read_stream_no_records(): } response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} expected_pages = [ - { - "request":{ - "url":"https://demonslayers.com/api/v1/hashiras", - "parameters": {"era": ["taisho"]}, - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - "http_method": "GET", - }, - "response":{"status": 200, "headers": {"field": "value"}, "body":'{"name": "field"}'}, - "records":[], - }, - { - "request":{ - "url": "https://demonslayers.com/api/v1/hashiras", - "parameters": {"era": ["taisho"]}, - "headers": {"Content-Type": "application/json"}, - "body": {"custom": "field"}, - "http_method": "GET", - }, - "response": {"status":200, "headers":{"field": "value"}, "body": '{"name": "field"}'}, - "records": [], - }, + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[], + ), + StreamReadPages( + request=HttpRequest( + url="https://demonslayers.com/api/v1/hashiras", + parameters={"era": ["taisho"]}, + headers={"Content-Type": "application/json"}, + body={"custom": "field"}, + http_method="GET", + ), + response=HttpResponse(status=200, headers={"field": "value"}, body='{"name": "field"}'), + records=[], + ), ] mock_source = make_mock_source( @@ -354,11 +341,8 @@ def test_read_stream_no_records(): api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) actual_response: AirbyteMessage = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") - record = actual_response.record - stream_read_object: StreamRead = StreamRead(**record.data) - stream_read_object.slices = [StreamReadSlicesInner(**s) for s in stream_read_object.slices] - single_slice = stream_read_object.slices[0] + single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] From b323578b6943903964c91583a4f600c9e333958a Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:40:44 -0800 Subject: [PATCH 17/71] test --- .../connector_builder_handler.py | 8 +-- .../unit_tests/connector_builder/test_read.py | 52 +++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 3efb73f3d884..26c6357132ef 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -21,13 +21,12 @@ import logging from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode -logger = logging.getLogger("airbyte.connector-builder") @dataclass class HttpResponse: status: int - body: Optional[str] - headers: Optional[Dict[str, Any]] + body: Optional[str] = None + headers: Optional[Dict[str, Any]] = None @dataclass class HttpRequest: @@ -111,6 +110,7 @@ def list_streams() -> AirbyteRecordMessage: class ConnectorBuilderHandler: + logger = logging.getLogger("airbyte.connector-builder") def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: int = 1000): self._max_pages_per_slice = max_pages_per_slice self._max_slices = max_slices @@ -136,7 +136,7 @@ def read_stream( state = {} # No support for incremental sync catalog = _create_configure_catalog(stream) for message_group in self._get_message_groups( - source.read(logger, config, catalog, state), + source.read(self.logger, config, catalog, state), schema_inferrer, record_limit, ): diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 936985594af6..f88914c53852 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -364,6 +364,58 @@ def test_read_stream_invalid_group_format(): with pytest.raises(ValueError) as actual_exception: api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") +@pytest.mark.parametrize( + "log_message, expected_response", + [ + pytest.param( + {"status_code": 200, "headers": {"field": "name"}, "body": '{"id": "fire", "owner": "kyojuro_rengoku"}'}, + HttpResponse(status=200, headers={"field": "name"}, body='{"id": "fire", "owner": "kyojuro_rengoku"}'), + id="test_create_response_with_all_fields", + ), + pytest.param( + {"status_code": 200, "headers": {"field": "name"}}, + HttpResponse(status=200, headers={"field": "name"}, body="{}"), + id="test_create_response_with_no_body", + ), + pytest.param( + {"status_code": 200, "body": '{"id": "fire", "owner": "kyojuro_rengoku"}'}, + HttpResponse(status=200, body='{"id": "fire", "owner": "kyojuro_rengoku"}'), + id="test_create_response_with_no_headers", + ), + pytest.param( + { + "status_code": 200, + "headers": {"field": "name"}, + "body": '[{"id": "fire", "owner": "kyojuro_rengoku"}, {"id": "mist", "owner": "muichiro_tokito"}]', + }, + HttpResponse( + status=200, + headers={"field": "name"}, + body='[{"id": "fire", "owner": "kyojuro_rengoku"}, {"id": "mist", "owner": "muichiro_tokito"}]', + ), + id="test_create_response_with_array", + ), + pytest.param( + {"status_code": 200, "body": "tomioka"}, + HttpResponse(status=200, body="tomioka"), + id="test_create_response_with_string", + ), + pytest.param("request:{invalid_json: }", None, id="test_invalid_json_still_does_not_crash"), + pytest.param("just a regular log message", None, id="test_no_response:_prefix_does_not_crash"), + ], +) +def test_create_response_from_log_message(log_message, expected_response): + if isinstance(log_message, str): + response_message = log_message + else: + response_message = f"response:{json.dumps(log_message)}" + + airbyte_log_message = AirbyteLogMessage(level=Level.INFO, message=response_message) + connector_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + actual_response = connector_builder_handler._create_response_from_log_message(airbyte_log_message) + + assert actual_response == expected_response + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From 9902dd5d0cce9770e5a18c385760b6066b42b2d8 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:43:43 -0800 Subject: [PATCH 18/71] test --- .../unit_tests/connector_builder/test_read.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index f88914c53852..466137fbd546 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -416,6 +416,47 @@ def test_create_response_from_log_message(log_message, expected_response): assert actual_response == expected_response +def test_read_stream_with_many_slices(): + request = {} + response = {"status_code": 200} + + mock_source = make_mock_source( + iter( + [ + slice_message(), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Muichiro Tokito"}), + slice_message(), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Shinobu Kocho"}), + record_message("hashiras", {"name": "Mitsuri Kanroji"}), + request_log_message(request), + response_log_message(response), + record_message("hashiras", {"name": "Obanai Iguro"}), + request_log_message(request), + response_log_message(response), + ] + ) + ) + + api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + + loop = asyncio.get_event_loop() + stream_read: StreamRead = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + + assert not stream_read.test_read_limit_reached + assert len(stream_read.slices) == 2 + + assert len(stream_read.slices[0].pages) == 1 + assert len(stream_read.slices[0].pages[0].records) == 1 + + assert len(stream_read.slices[1].pages) == 3 + assert len(stream_read.slices[1].pages[0].records) == 2 + assert len(stream_read.slices[1].pages[1].records) == 1 + assert len(stream_read.slices[1].pages[2].records) == 0 + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From 3b3255e957018d8576b417489ec63c59d60abfe9 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:44:02 -0800 Subject: [PATCH 19/71] rename --- airbyte-cdk/python/unit_tests/connector_builder/test_read.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 466137fbd546..8aa5b81404a3 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -441,10 +441,9 @@ def test_read_stream_with_many_slices(): ) ) - api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + connecto_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) - loop = asyncio.get_event_loop() - stream_read: StreamRead = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = connecto_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") assert not stream_read.test_read_limit_reached assert len(stream_read.slices) == 2 From 6242e3a3581f4fc93be456bad9eba67af8bcc4af Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:45:43 -0800 Subject: [PATCH 20/71] test --- .../unit_tests/connector_builder/test_read.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 8aa5b81404a3..2f6e49c9253a 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -456,6 +456,20 @@ def test_read_stream_with_many_slices(): assert len(stream_read.slices[1].pages[1].records) == 1 assert len(stream_read.slices[1].pages[2].records) == 0 +def test_read_stream_given_maximum_number_of_slices_then_test_read_limit_reached(): + maximum_number_of_slices = 5 + request = {} + response = {"status_code": 200} + mock_source = make_mock_source( + iter([slice_message(), request_log_message(request), response_log_message(response)] * maximum_number_of_slices) + ) + + api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + + stream_read: StreamRead = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + + assert stream_read.test_read_limit_reached + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From c8631f198e66b5444c3fba1b54da08d8268f0174 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:47:05 -0800 Subject: [PATCH 21/71] test --- .../unit_tests/connector_builder/test_read.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 2f6e49c9253a..20638f85c4e3 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -470,6 +470,20 @@ def test_read_stream_given_maximum_number_of_slices_then_test_read_limit_reached assert stream_read.test_read_limit_reached +def test_read_stream_given_maximum_number_of_pages_then_test_read_limit_reached(): + maximum_number_of_pages_per_slice = 5 + request = {} + response = {"status_code": 200} + mock_source = make_mock_source( + iter([slice_message()] + [request_log_message(request), response_log_message(response)] * maximum_number_of_pages_per_slice) + ) + + api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + + stream_read: StreamRead = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + + assert stream_read.test_read_limit_reached + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value From fe1da29359be14f76cf2787421538226fc713108 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 16:47:43 -0800 Subject: [PATCH 22/71] test --- airbyte-cdk/python/unit_tests/connector_builder/test_read.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py index 20638f85c4e3..303e08d2ff2c 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_read.py @@ -2,14 +2,11 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import asyncio -import json from unittest.mock import MagicMock import pytest -from pydantic.error_wrappers import ValidationError -from airbyte_cdk.models import Level, Type +from airbyte_cdk.models import Level from connector_builder.connector_builder_handler import * MAX_PAGES_PER_SLICE = 4 From 5b0750c3a49f9273e249884a5367bfbb8e6d3663 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 17:19:41 -0800 Subject: [PATCH 23/71] main seems to work --- .../connector_builder_handler.py | 6 ++-- .../source_declarative_manifest/main.py | 35 +++++++++++++------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 26c6357132ef..d97a77ddb5ab 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -13,7 +13,7 @@ from copy import deepcopy import json from json import JSONDecodeError -from typing import Any, Dict, Iterable, Iterator, Optional, Union +from typing import Any, Dict, Iterable, Iterator, Mapping, Optional, Union from urllib.parse import parse_qs, urlparse from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type @@ -75,7 +75,6 @@ class StreamReadRequestBody: record_limit: Optional[int] def __post_init__(self): - print(self.record_limit) raise ValueError("here") if not (1 <= self.record_limit <= 1000): raise ValueError("") #FIXME @@ -118,7 +117,7 @@ def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: def read_stream( self, source: DeclarativeSource, - config: Dict[str, Any], + config: Mapping[str, Any], stream: str, record_limit: Optional[int] = None, ) -> StreamRead: @@ -196,7 +195,6 @@ def _get_message_groups( elif message.type == Type.LOG: yield message.log elif message.type == Type.RECORD: - print(f"record! {message.record.data}") current_page_records.append(message.record.data) records_count += 1 schema_inferrer.accumulate(message.record) diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index d8db69c2c2a0..edf85a7b25e8 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -4,9 +4,13 @@ import argparse +import dataclasses import sys from typing import Any, List, Mapping, Tuple +from airbyte_cdk.models import AirbyteRecordMessage, AirbyteMessage, Level, SyncMode + +from airbyte_cdk.models import Type as MessageType from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource @@ -15,10 +19,12 @@ from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode import logging +from connector_builder.connector_builder_handler import _emitted_at + -def create_source(config: Mapping[str, Any]) -> DeclarativeSource: +def create_source(config: Mapping[str, Any], debug) -> DeclarativeSource: manifest = config.get("__injected_declarative_manifest") - return ManifestDeclarativeSource(manifest) + return ManifestDeclarativeSource(manifest, debug) def get_config_from_args(args: List[str]) -> Mapping[str, Any]: @@ -44,22 +50,28 @@ def preparse(args: List[str]) -> Tuple[str, str, str]: return parsed.command, parsed.config -def execute_command(source: DeclarativeSource, config: Mapping[str, Any]): +def execute_command(source: DeclarativeSource, config: Mapping[str, Any]) -> AirbyteMessage: command = config.get("__command") command_config = config.get("__command_config") if command == "resolve_manifest": return connector_builder_handler.resolve_manifest(source) elif command == "read": stream_name = command_config["stream_name"] - configured_catalog = create_configure_catalog(stream_name) - logger = logging.getLogger(f"airbyte.{source.name}") - return source.read(logger, config, configured_catalog, None) + max_pages_per_slice = command_config["max_pages_per_slice"] + max_slices = command_config["max_slices"] + max_record_limit = command_config["max_records"] + handler = connector_builder_handler.ConnectorBuilderHandler(max_pages_per_slice, max_slices) + stream_read = handler.read_stream(source, config, stream_name, max_record_limit) + return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( + data=dataclasses.asdict(stream_read), + stream="_test_read", + emitted_at=_emitted_at() #FIXME need to move to connector_builder_handler + )) raise ValueError(f"Unrecognized command {command}.") def handle_connector_builder_request(source: DeclarativeSource, config: Mapping[str, Any]): - messages = execute_command(source, config) - for message in messages: - print(message.json(exclude_unset=True)) + message = execute_command(source, config) + print(message.json(exclude_unset=True)) def handle_connector_request(source: DeclarativeSource, args: List[str]): @@ -70,8 +82,9 @@ def handle_connector_request(source: DeclarativeSource, args: List[str]): def handle_request(args: List[str]): config = get_config_from_args(args) - source = create_source(config) - if "__command" in config: + is_connector_builder_request = "__command" in config + source = create_source(config, is_connector_builder_request) + if is_connector_builder_request: handle_connector_builder_request(source, config) else: handle_connector_request(source, args) From 7dd5ada226df53e53dfb2ae49f7ac2da802d1351 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 17:33:18 -0800 Subject: [PATCH 24/71] Update --- .../connector_builder_handler.py | 253 ---------------- .../connector_builder/message_grouper.py | 273 +++++++++++++++++- .../source_declarative_manifest/main.py | 7 +- .../{test_read.py => test_message_grouper.py} | 64 ++-- 4 files changed, 306 insertions(+), 291 deletions(-) rename airbyte-cdk/python/unit_tests/connector_builder/{test_read.py => test_message_grouper.py} (87%) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index d97a77ddb5ab..3e88494313fe 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -22,248 +22,11 @@ from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode -@dataclass -class HttpResponse: - status: int - body: Optional[str] = None - headers: Optional[Dict[str, Any]] = None - -@dataclass -class HttpRequest: - url: str - parameters: Optional[Dict[str, Any]] - body: Optional[Dict[str, Any]] - headers: Optional[Dict[str, Any]] - http_method: str -@dataclass -class StreamReadPages: - records: List[object] - request: Optional[HttpRequest] = None - response: Optional[HttpResponse] = None - -@dataclass -class StreamReadSlicesInnerPagesInner: - - records: List[object] - request: Optional[HttpRequest] - response: Optional[HttpResponse] - -@dataclass -class StreamReadSlicesInnerSliceDescriptor: - start_datetime: Optional[datetime] - list_item: Optional[str] - -@dataclass -class StreamReadSlicesInner: - pages: List[StreamReadSlicesInnerPagesInner] - slice_descriptor: Optional[StreamReadSlicesInnerSliceDescriptor] - state: Optional[Dict[str, Any]] - -@dataclass -class StreamRead(object): - logs: List[object] - slices: List[StreamReadSlicesInner] - test_read_limit_reached: bool - inferred_schema: Optional[Dict[str, Any]] - -@dataclass -class StreamReadRequestBody: - manifest: Dict[str, Any] - stream: str - config: Dict[str, Any] - state: Optional[Dict[str, Any]] - record_limit: Optional[int] - - def __post_init__(self): - raise ValueError("here") - if not (1 <= self.record_limit <= 1000): - raise ValueError("") #FIXME - -#FIXME: can dataclasses also have validators? -""" - @validator("record_limit") - def record_limit_max(cls, value): - assert value <= 1000 - return value - - @validator("record_limit") - def record_limit_min(cls, value): - assert value >= 1 - return value -""" - -@dataclass -class StreamReadSliceDescriptor: - start_datetime: Optional[datetime] = None - list_item: Optional[str] = None - -@dataclass -class StreamReadSlices: - pages: List[StreamReadPages] - slice_descriptor: Optional[StreamReadSliceDescriptor] = None - state: Optional[Dict[str, Any]] = None def list_streams() -> AirbyteRecordMessage: raise NotImplementedError - -class ConnectorBuilderHandler: - logger = logging.getLogger("airbyte.connector-builder") - def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: int = 1000): - self._max_pages_per_slice = max_pages_per_slice - self._max_slices = max_slices - self.max_record_limit = max_record_limit - def read_stream( - self, - source: DeclarativeSource, - config: Mapping[str, Any], - stream: str, - record_limit: Optional[int] = None, - ) -> StreamRead: - if record_limit is not None and not (1 <= record_limit <= 1000): - raise ValueError("") - schema_inferrer = SchemaInferrer() - - if record_limit is None: - record_limit = self.max_record_limit - else: - record_limit = min(record_limit, self.max_record_limit) - - slices = [] - log_messages = [] - state = {} # No support for incremental sync - catalog = _create_configure_catalog(stream) - for message_group in self._get_message_groups( - source.read(self.logger, config, catalog, state), - schema_inferrer, - record_limit, - ): - if isinstance(message_group, AirbyteLogMessage): - log_messages.append({"message": message_group.message}) - else: - slices.append(message_group) - - return StreamRead( - logs=log_messages, - slices=slices, - test_read_limit_reached=self._has_reached_limit(slices), - inferred_schema=schema_inferrer.get_stream_schema(stream) - ) - - def _get_message_groups( - self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int - ) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]: - """ - Message groups are partitioned according to when request log messages are received. Subsequent response log messages - and record messages belong to the prior request log message and when we encounter another request, append the latest - message group, until records have been read. - - Messages received from the CDK read operation will always arrive in the following order: - {type: LOG, log: {message: "request: ..."}} - {type: LOG, log: {message: "response: ..."}} - ... 0 or more record messages - {type: RECORD, record: {data: ...}} - {type: RECORD, record: {data: ...}} - Repeats for each request/response made - - Note: The exception is that normal log messages can be received at any time which are not incorporated into grouping - """ - records_count = 0 - at_least_one_page_in_group = False - current_page_records = [] - current_slice_pages = [] - current_page_request: Optional[HttpRequest] = None - current_page_response: Optional[HttpResponse] = None - - while records_count < limit and (message := next(messages, None)): - if self._need_to_close_page(at_least_one_page_in_group, message): - self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records) - current_page_request = None - current_page_response = None - - if at_least_one_page_in_group and message.type == Type.LOG and message.log.message.startswith("slice:"): - yield StreamReadSlices(pages=current_slice_pages) - current_slice_pages = [] - at_least_one_page_in_group = False - elif message.type == Type.LOG and message.log.message.startswith("request:"): - if not at_least_one_page_in_group: - at_least_one_page_in_group = True - current_page_request = self._create_request_from_log_message(message.log) - elif message.type == Type.LOG and message.log.message.startswith("response:"): - current_page_response = self._create_response_from_log_message(message.log) - elif message.type == Type.LOG: - yield message.log - elif message.type == Type.RECORD: - current_page_records.append(message.record.data) - records_count += 1 - schema_inferrer.accumulate(message.record) - else: - self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records) - yield StreamReadSlices(pages=current_slice_pages) - - @staticmethod - def _need_to_close_page(at_least_one_page_in_group, message): - return ( - at_least_one_page_in_group - and message.type == Type.LOG - and (message.log.message.startswith("request:") or message.log.message.startswith("slice:")) - ) - - @staticmethod - def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records): - if not current_page_request or not current_page_response: - raise ValueError("Every message grouping should have at least one request and response") - - current_slice_pages.append( - StreamReadPages(request=current_page_request, response=current_page_response, records=deepcopy(current_page_records)) - ) - current_page_records.clear() - - def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]: - # TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the - # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the - # protocol change is worked on. - raw_request = log_message.message.partition("request:")[2] - try: - request = json.loads(raw_request) - url = urlparse(request.get("url", "")) - full_path = f"{url.scheme}://{url.hostname}{url.path}" if url else "" - parameters = parse_qs(url.query) or None - return HttpRequest( - url=full_path, - http_method=request.get("http_method", ""), - headers=request.get("headers"), - parameters=parameters, - body=request.get("body"), - ) - except JSONDecodeError as error: - self.logger.warning(f"Failed to parse log message into request object with error: {error}") - return None - - def _create_response_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpResponse]: - # TODO: As a temporary stopgap, the CDK emits response data as a log message string. Ideally this should come in the - # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the - # protocol change is worked on. - raw_response = log_message.message.partition("response:")[2] - try: - response = json.loads(raw_response) - body = response.get("body", "{}") - return HttpResponse(status=response.get("status_code"), body=body, headers=response.get("headers")) - except JSONDecodeError as error: - self.logger.warning(f"Failed to parse log message into response object with error: {error}") - return None - - def _has_reached_limit(self, slices): - if len(slices) >= self._max_slices: - return True - - for slice in slices: - if len(slice.pages) >= self._max_pages_per_slice: - return True - return False - - def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: try: return AirbyteRecordMessage( @@ -279,19 +42,3 @@ def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: def _emitted_at(): return int(datetime.now().timestamp()) * 1000 -def _create_configure_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: - return ConfiguredAirbyteCatalog.parse_obj( - { - "streams": [ - { - "stream": { - "name": stream_name, - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - } - ] - } - ) diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py index 726efe114c31..29fb223aa47a 100644 --- a/airbyte-cdk/python/connector_builder/message_grouper.py +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -2,9 +2,278 @@ from airbyte_cdk.models import AirbyteMessage from airbyte_cdk.utils.schema_inferrer import SchemaInferrer +from datetime import datetime +from typing import List + +from airbyte_cdk.models import AirbyteRecordMessage +from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource +from airbyte_cdk.utils.traced_exception import AirbyteTracedException + +from dataclasses import asdict, dataclass +from copy import deepcopy +import json +from json import JSONDecodeError +from typing import Any, Dict, Iterable, Iterator, Mapping, Optional, Union +from urllib.parse import parse_qs, urlparse + +from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type +from airbyte_cdk.utils.schema_inferrer import SchemaInferrer +import logging +from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode + + +@dataclass +class HttpResponse: + status: int + body: Optional[str] = None + headers: Optional[Dict[str, Any]] = None + +@dataclass +class HttpRequest: + url: str + parameters: Optional[Dict[str, Any]] + body: Optional[Dict[str, Any]] + headers: Optional[Dict[str, Any]] + http_method: str +@dataclass +class StreamReadPages: + records: List[object] + request: Optional[HttpRequest] = None + response: Optional[HttpResponse] = None + +@dataclass +class StreamReadSlicesInnerPagesInner: + + records: List[object] + request: Optional[HttpRequest] + response: Optional[HttpResponse] + +@dataclass +class StreamReadSlicesInnerSliceDescriptor: + start_datetime: Optional[datetime] + list_item: Optional[str] + +@dataclass +class StreamReadSlicesInner: + pages: List[StreamReadSlicesInnerPagesInner] + slice_descriptor: Optional[StreamReadSlicesInnerSliceDescriptor] + state: Optional[Dict[str, Any]] + +@dataclass +class StreamRead(object): + logs: List[object] + slices: List[StreamReadSlicesInner] + test_read_limit_reached: bool + inferred_schema: Optional[Dict[str, Any]] + +@dataclass +class StreamReadRequestBody: + manifest: Dict[str, Any] + stream: str + config: Dict[str, Any] + state: Optional[Dict[str, Any]] + record_limit: Optional[int] + + def __post_init__(self): + raise ValueError("here") + if not (1 <= self.record_limit <= 1000): + raise ValueError("") #FIXME + +#FIXME: can dataclasses also have validators? +""" + @validator("record_limit") + def record_limit_max(cls, value): + assert value <= 1000 + return value + + @validator("record_limit") + def record_limit_min(cls, value): + assert value >= 1 + return value +""" + +@dataclass +class StreamReadSliceDescriptor: + start_datetime: Optional[datetime] = None + list_item: Optional[str] = None + +@dataclass +class StreamReadSlices: + pages: List[StreamReadPages] + slice_descriptor: Optional[StreamReadSliceDescriptor] = None + state: Optional[Dict[str, Any]] = None class MessageGrouper: + logger = logging.getLogger("airbyte.connector-builder") + + def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: int = 1000): + self._max_pages_per_slice = max_pages_per_slice + self._max_slices = max_slices + self.max_record_limit = max_record_limit + + def get_grouped_messages(self, + source: DeclarativeSource, + config: Mapping[str, Any], + stream: str, + record_limit: Optional[int] = None, + ) -> StreamRead: + if record_limit is not None and not (1 <= record_limit <= 1000): + raise ValueError("") + schema_inferrer = SchemaInferrer() + + if record_limit is None: + record_limit = self.max_record_limit + else: + record_limit = min(record_limit, self.max_record_limit) + + slices = [] + log_messages = [] + state = {} # No support for incremental sync + catalog = MessageGrouper._create_configure_catalog(stream) + for message_group in self._get_message_groups( + source.read(self.logger, config, catalog, state), + schema_inferrer, + record_limit, + ): + if isinstance(message_group, AirbyteLogMessage): + log_messages.append({"message": message_group.message}) + else: + slices.append(message_group) + + return StreamRead( + logs=log_messages, + slices=slices, + test_read_limit_reached=self._has_reached_limit(slices), + inferred_schema=schema_inferrer.get_stream_schema(stream) + ) + + def _get_message_groups( + self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int + ) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]: + """ + Message groups are partitioned according to when request log messages are received. Subsequent response log messages + and record messages belong to the prior request log message and when we encounter another request, append the latest + message group, until records have been read. + + Messages received from the CDK read operation will always arrive in the following order: + {type: LOG, log: {message: "request: ..."}} + {type: LOG, log: {message: "response: ..."}} + ... 0 or more record messages + {type: RECORD, record: {data: ...}} + {type: RECORD, record: {data: ...}} + Repeats for each request/response made + + Note: The exception is that normal log messages can be received at any time which are not incorporated into grouping + """ + records_count = 0 + at_least_one_page_in_group = False + current_page_records = [] + current_slice_pages = [] + current_page_request: Optional[HttpRequest] = None + current_page_response: Optional[HttpResponse] = None + + while records_count < limit and (message := next(messages, None)): + if self._need_to_close_page(at_least_one_page_in_group, message): + self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records) + current_page_request = None + current_page_response = None + + if at_least_one_page_in_group and message.type == Type.LOG and message.log.message.startswith("slice:"): + yield StreamReadSlices(pages=current_slice_pages) + current_slice_pages = [] + at_least_one_page_in_group = False + elif message.type == Type.LOG and message.log.message.startswith("request:"): + if not at_least_one_page_in_group: + at_least_one_page_in_group = True + current_page_request = self._create_request_from_log_message(message.log) + elif message.type == Type.LOG and message.log.message.startswith("response:"): + current_page_response = self._create_response_from_log_message(message.log) + elif message.type == Type.LOG: + yield message.log + elif message.type == Type.RECORD: + current_page_records.append(message.record.data) + records_count += 1 + schema_inferrer.accumulate(message.record) + else: + self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records) + yield StreamReadSlices(pages=current_slice_pages) + + @staticmethod + def _need_to_close_page(at_least_one_page_in_group, message): + return ( + at_least_one_page_in_group + and message.type == Type.LOG + and (message.log.message.startswith("request:") or message.log.message.startswith("slice:")) + ) + + @staticmethod + def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records): + if not current_page_request or not current_page_response: + raise ValueError("Every message grouping should have at least one request and response") + + current_slice_pages.append( + StreamReadPages(request=current_page_request, response=current_page_response, records=deepcopy(current_page_records)) + ) + current_page_records.clear() + + def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]: + # TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the + # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the + # protocol change is worked on. + raw_request = log_message.message.partition("request:")[2] + try: + request = json.loads(raw_request) + url = urlparse(request.get("url", "")) + full_path = f"{url.scheme}://{url.hostname}{url.path}" if url else "" + parameters = parse_qs(url.query) or None + return HttpRequest( + url=full_path, + http_method=request.get("http_method", ""), + headers=request.get("headers"), + parameters=parameters, + body=request.get("body"), + ) + except JSONDecodeError as error: + self.logger.warning(f"Failed to parse log message into request object with error: {error}") + return None + + def _create_response_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpResponse]: + # TODO: As a temporary stopgap, the CDK emits response data as a log message string. Ideally this should come in the + # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the + # protocol change is worked on. + raw_response = log_message.message.partition("response:")[2] + try: + response = json.loads(raw_response) + body = response.get("body", "{}") + return HttpResponse(status=response.get("status_code"), body=body, headers=response.get("headers")) + except JSONDecodeError as error: + self.logger.warning(f"Failed to parse log message into response object with error: {error}") + return None + + def _has_reached_limit(self, slices): + if len(slices) >= self._max_slices: + return True + + for slice in slices: + if len(slice.pages) >= self._max_pages_per_slice: + return True + return False - def get_message_groups(self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int) -> Iterable: #FIXME: set right return type - pass + @classmethod + def _create_configure_catalog(cls, stream_name: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog.parse_obj( + { + "streams": [ + { + "stream": { + "name": stream_name, + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] + } + ) diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index edf85a7b25e8..ebd2304ab9e5 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -16,8 +16,7 @@ from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder import connector_builder_handler -from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode -import logging +from connector_builder.message_grouper import MessageGrouper from connector_builder.connector_builder_handler import _emitted_at @@ -60,8 +59,8 @@ def execute_command(source: DeclarativeSource, config: Mapping[str, Any]) -> Air max_pages_per_slice = command_config["max_pages_per_slice"] max_slices = command_config["max_slices"] max_record_limit = command_config["max_records"] - handler = connector_builder_handler.ConnectorBuilderHandler(max_pages_per_slice, max_slices) - stream_read = handler.read_stream(source, config, stream_name, max_record_limit) + handler = MessageGrouper(max_pages_per_slice, max_slices) + stream_read = handler.get_grouped_messages(source, config, stream_name, max_record_limit) return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( data=dataclasses.asdict(stream_read), stream="_test_read", diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py similarity index 87% rename from airbyte-cdk/python/unit_tests/connector_builder/test_read.py rename to airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py index 303e08d2ff2c..11b070bb7137 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_read.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py @@ -7,7 +7,7 @@ import pytest from airbyte_cdk.models import Level -from connector_builder.connector_builder_handler import * +from connector_builder.message_grouper import * MAX_PAGES_PER_SLICE = 4 MAX_SLICES = 3 @@ -70,7 +70,7 @@ CONFIG = {"rank": "upper-six"} -def test_read_stream(): +def test_get_grouped_messages(): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", "headers": {"Content-Type": "application/json"}, @@ -118,15 +118,15 @@ def test_read_stream(): ) ) - connector_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: StreamRead = connector_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) + actual_response: StreamRead = connector_builder_handler.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") assert actual_response.inferred_schema == expected_schema single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] -def test_read_stream_with_logs(): +def test_get_grouped_messages_with_logs(): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", "headers": {"Content-Type": "application/json"}, @@ -178,9 +178,9 @@ def test_read_stream_with_logs(): ) ) - connector_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: AirbyteMessage = connector_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: AirbyteMessage = connector_builder_handler.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] @@ -195,7 +195,7 @@ def test_read_stream_with_logs(): pytest.param(3, 1, id="test_create_request_record_limit_exceeds_max"), ], ) -def test_read_stream_record_limit(request_record_limit, max_record_limit): +def test_get_grouped_messages_record_limit(request_record_limit, max_record_limit): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", "headers": {"Content-Type": "application/json"}, @@ -219,8 +219,8 @@ def test_read_stream_record_limit(request_record_limit, max_record_limit): n_records = 2 record_limit = min(request_record_limit, max_record_limit) - api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: StreamRead = api.read_stream(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) + api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) + actual_response: StreamRead = api.get_grouped_messages(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): @@ -234,7 +234,7 @@ def test_read_stream_record_limit(request_record_limit, max_record_limit): pytest.param(1, id="test_create_request_no_record_limit_n_records_exceed_max"), ], ) -def test_read_stream_default_record_limit(max_record_limit): +def test_get_grouped_messages_default_record_limit(max_record_limit): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", "headers": {"Content-Type": "application/json"}, @@ -257,15 +257,15 @@ def test_read_stream_default_record_limit(max_record_limit): ) n_records = 2 - api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: StreamRead = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) + actual_response: StreamRead = api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): total_records += len(actual_page.records) assert total_records == min([max_record_limit, n_records]) -def test_read_stream_limit_0(): +def test_get_grouped_messages_limit_0(): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", "headers": {"Content-Type": "application/json"}, @@ -286,12 +286,12 @@ def test_read_stream_limit_0(): ] ) ) - api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) with pytest.raises(ValueError): - api.read_stream(source=mock_source, config=CONFIG, stream="hashiras", record_limit=0) + api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras", record_limit=0) -def test_read_stream_no_records(): +def test_get_grouped_messages_no_records(): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", "headers": {"Content-Type": "application/json"}, @@ -335,15 +335,15 @@ def test_read_stream_no_records(): ) ) - api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: AirbyteMessage = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: AirbyteMessage = api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] -def test_read_stream_invalid_group_format(): +def test_get_grouped_messages_invalid_group_format(): response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} mock_source = make_mock_source( @@ -356,10 +356,10 @@ def test_read_stream_invalid_group_format(): ) ) - api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) with pytest.raises(ValueError) as actual_exception: - api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") @pytest.mark.parametrize( "log_message, expected_response", @@ -408,12 +408,12 @@ def test_create_response_from_log_message(log_message, expected_response): response_message = f"response:{json.dumps(log_message)}" airbyte_log_message = AirbyteLogMessage(level=Level.INFO, message=response_message) - connector_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) actual_response = connector_builder_handler._create_response_from_log_message(airbyte_log_message) assert actual_response == expected_response -def test_read_stream_with_many_slices(): +def test_get_grouped_messages_with_many_slices(): request = {} response = {"status_code": 200} @@ -438,9 +438,9 @@ def test_read_stream_with_many_slices(): ) ) - connecto_builder_handler = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + connecto_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = connecto_builder_handler.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = connecto_builder_handler.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") assert not stream_read.test_read_limit_reached assert len(stream_read.slices) == 2 @@ -453,7 +453,7 @@ def test_read_stream_with_many_slices(): assert len(stream_read.slices[1].pages[1].records) == 1 assert len(stream_read.slices[1].pages[2].records) == 0 -def test_read_stream_given_maximum_number_of_slices_then_test_read_limit_reached(): +def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limit_reached(): maximum_number_of_slices = 5 request = {} response = {"status_code": 200} @@ -461,13 +461,13 @@ def test_read_stream_given_maximum_number_of_slices_then_test_read_limit_reached iter([slice_message(), request_log_message(request), response_log_message(response)] * maximum_number_of_slices) ) - api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") assert stream_read.test_read_limit_reached -def test_read_stream_given_maximum_number_of_pages_then_test_read_limit_reached(): +def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit_reached(): maximum_number_of_pages_per_slice = 5 request = {} response = {"status_code": 200} @@ -475,9 +475,9 @@ def test_read_stream_given_maximum_number_of_pages_then_test_read_limit_reached( iter([slice_message()] + [request_log_message(request), response_log_message(response)] * maximum_number_of_pages_per_slice) ) - api = ConnectorBuilderHandler(MAX_PAGES_PER_SLICE, MAX_SLICES) + api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = api.read_stream(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") assert stream_read.test_read_limit_reached From 2f048e91dcce6e06aa4969c49197e406d8f91cf7 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 17:47:08 -0800 Subject: [PATCH 25/71] Update --- .../connector_builder_handler.py | 54 +++++++++++++------ .../source_declarative_manifest/main.py | 25 ++------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 3e88494313fe..be5de221d3f3 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -2,30 +2,37 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import dataclasses from datetime import datetime -from typing import List +from typing import Any, Mapping +from typing import Union -from airbyte_cdk.models import AirbyteRecordMessage +from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog + +from airbyte_cdk.models import AirbyteRecordMessage, AirbyteMessage +from airbyte_cdk.models import Type as MessageType from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.utils.traced_exception import AirbyteTracedException - -from dataclasses import asdict, dataclass -from copy import deepcopy -import json -from json import JSONDecodeError -from typing import Any, Dict, Iterable, Iterator, Mapping, Optional, Union -from urllib.parse import parse_qs, urlparse - -from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type -from airbyte_cdk.utils.schema_inferrer import SchemaInferrer -import logging -from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode - +from connector_builder.message_grouper import MessageGrouper def list_streams() -> AirbyteRecordMessage: raise NotImplementedError +def read_stream(source: DeclarativeSource, config: Mapping[str, Any]): + command_config = config.get("__command_config") + stream_name = command_config["stream_name"] + max_pages_per_slice = command_config["max_pages_per_slice"] + max_slices = command_config["max_slices"] + max_records = command_config["max_records"] + handler = MessageGrouper(max_pages_per_slice, max_slices) + stream_read = handler.get_grouped_messages(source, config, stream_name, max_records) + return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( + data=dataclasses.asdict(stream_read), + stream="_test_read", + emitted_at=_emitted_at() + )) + def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: try: @@ -42,3 +49,20 @@ def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: def _emitted_at(): return int(datetime.now().timestamp()) * 1000 +def _create_configure_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog.parse_obj( + { + "streams": [ + { + "stream": { + "name": stream_name, + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] + } + ) + diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index ebd2304ab9e5..0f41f6826b53 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -4,24 +4,18 @@ import argparse -import dataclasses import sys from typing import Any, List, Mapping, Tuple -from airbyte_cdk.models import AirbyteRecordMessage, AirbyteMessage, Level, SyncMode - -from airbyte_cdk.models import Type as MessageType from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch +from airbyte_cdk.models import AirbyteMessage from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder import connector_builder_handler -from connector_builder.message_grouper import MessageGrouper - -from connector_builder.connector_builder_handler import _emitted_at -def create_source(config: Mapping[str, Any], debug) -> DeclarativeSource: +def create_source(config: Mapping[str, Any], debug: bool) -> DeclarativeSource: manifest = config.get("__injected_declarative_manifest") return ManifestDeclarativeSource(manifest, debug) @@ -41,7 +35,7 @@ def get_config_from_args(args: List[str]) -> Mapping[str, Any]: return config -def preparse(args: List[str]) -> Tuple[str, str, str]: +def preparse(args: List[str]) -> Tuple[str, str]: parser = argparse.ArgumentParser() parser.add_argument("command", type=str, help="Airbyte Protocol command") parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") @@ -51,21 +45,10 @@ def preparse(args: List[str]) -> Tuple[str, str, str]: def execute_command(source: DeclarativeSource, config: Mapping[str, Any]) -> AirbyteMessage: command = config.get("__command") - command_config = config.get("__command_config") if command == "resolve_manifest": return connector_builder_handler.resolve_manifest(source) elif command == "read": - stream_name = command_config["stream_name"] - max_pages_per_slice = command_config["max_pages_per_slice"] - max_slices = command_config["max_slices"] - max_record_limit = command_config["max_records"] - handler = MessageGrouper(max_pages_per_slice, max_slices) - stream_read = handler.get_grouped_messages(source, config, stream_name, max_record_limit) - return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( - data=dataclasses.asdict(stream_read), - stream="_test_read", - emitted_at=_emitted_at() #FIXME need to move to connector_builder_handler - )) + return connector_builder_handler.read_stream(source, config) raise ValueError(f"Unrecognized command {command}.") def handle_connector_builder_request(source: DeclarativeSource, config: Mapping[str, Any]): From dd778ed3ac44ef5b1fb4c8dd03ce4fd2c33a6333 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 17:53:36 -0800 Subject: [PATCH 26/71] Update --- .../connector_builder/test_connector_builder_handler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 26e652df0faf..84b8795fab65 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -1,9 +1,11 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from typing import Iterator +from unittest.mock import MagicMock from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import resolve_manifest +from connector_builder.connector_builder_handler import read_stream, resolve_manifest _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" From e79adf8c4db8088664f94c5142e99f746b4ab537 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 17:57:26 -0800 Subject: [PATCH 27/71] Update --- .../connector_builder_handler.py | 3 +- .../connector_builder/message_grouper.py | 105 ++---------------- .../test_connector_builder_handler.py | 4 +- 3 files changed, 12 insertions(+), 100 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index be5de221d3f3..cffb340af643 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -19,6 +19,7 @@ def list_streams() -> AirbyteRecordMessage: raise NotImplementedError + def read_stream(source: DeclarativeSource, config: Mapping[str, Any]): command_config = config.get("__command_config") stream_name = command_config["stream_name"] @@ -49,6 +50,7 @@ def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: def _emitted_at(): return int(datetime.now().timestamp()) * 1000 + def _create_configure_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: return ConfiguredAirbyteCatalog.parse_obj( { @@ -65,4 +67,3 @@ def _create_configure_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: ] } ) - diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py index 29fb223aa47a..cbbf392a0e62 100644 --- a/airbyte-cdk/python/connector_builder/message_grouper.py +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -1,107 +1,20 @@ -from typing import Iterable, Iterator +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# -from airbyte_cdk.models import AirbyteMessage -from airbyte_cdk.utils.schema_inferrer import SchemaInferrer -from datetime import datetime -from typing import List - -from airbyte_cdk.models import AirbyteRecordMessage -from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource -from airbyte_cdk.utils.traced_exception import AirbyteTracedException - -from dataclasses import asdict, dataclass -from copy import deepcopy import json from json import JSONDecodeError -from typing import Any, Dict, Iterable, Iterator, Mapping, Optional, Union +from typing import Any, Iterable, Iterator, Mapping, Optional, Union from urllib.parse import parse_qs, urlparse +from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog + from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type +from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.utils.schema_inferrer import SchemaInferrer +from connector_builder.models import StreamRead, StreamReadPages, HttpResponse, HttpRequest, StreamReadSlices import logging -from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode, DestinationSyncMode - - -@dataclass -class HttpResponse: - status: int - body: Optional[str] = None - headers: Optional[Dict[str, Any]] = None - -@dataclass -class HttpRequest: - url: str - parameters: Optional[Dict[str, Any]] - body: Optional[Dict[str, Any]] - headers: Optional[Dict[str, Any]] - http_method: str -@dataclass -class StreamReadPages: - records: List[object] - request: Optional[HttpRequest] = None - response: Optional[HttpResponse] = None - -@dataclass -class StreamReadSlicesInnerPagesInner: - - records: List[object] - request: Optional[HttpRequest] - response: Optional[HttpResponse] - -@dataclass -class StreamReadSlicesInnerSliceDescriptor: - start_datetime: Optional[datetime] - list_item: Optional[str] - -@dataclass -class StreamReadSlicesInner: - pages: List[StreamReadSlicesInnerPagesInner] - slice_descriptor: Optional[StreamReadSlicesInnerSliceDescriptor] - state: Optional[Dict[str, Any]] - -@dataclass -class StreamRead(object): - logs: List[object] - slices: List[StreamReadSlicesInner] - test_read_limit_reached: bool - inferred_schema: Optional[Dict[str, Any]] - -@dataclass -class StreamReadRequestBody: - manifest: Dict[str, Any] - stream: str - config: Dict[str, Any] - state: Optional[Dict[str, Any]] - record_limit: Optional[int] - - def __post_init__(self): - raise ValueError("here") - if not (1 <= self.record_limit <= 1000): - raise ValueError("") #FIXME - -#FIXME: can dataclasses also have validators? -""" - @validator("record_limit") - def record_limit_max(cls, value): - assert value <= 1000 - return value - - @validator("record_limit") - def record_limit_min(cls, value): - assert value >= 1 - return value -""" - -@dataclass -class StreamReadSliceDescriptor: - start_datetime: Optional[datetime] = None - list_item: Optional[str] = None - -@dataclass -class StreamReadSlices: - pages: List[StreamReadPages] - slice_descriptor: Optional[StreamReadSliceDescriptor] = None - state: Optional[Dict[str, Any]] = None +from copy import deepcopy class MessageGrouper: diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 84b8795fab65..26e652df0faf 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -1,11 +1,9 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Iterator -from unittest.mock import MagicMock from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import read_stream, resolve_manifest +from connector_builder.connector_builder_handler import resolve_manifest _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" From ab009a616b3b61fef8d948464f3bf26ed7bee1a7 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 17:58:15 -0800 Subject: [PATCH 28/71] update --- .../connector_builder_handler.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index cffb340af643..ec31367e15b5 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -49,21 +49,3 @@ def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: def _emitted_at(): return int(datetime.now().timestamp()) * 1000 - - -def _create_configure_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: - return ConfiguredAirbyteCatalog.parse_obj( - { - "streams": [ - { - "stream": { - "name": stream_name, - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - } - ] - } - ) From 71f94c1f4bb15142240d7e0436cbe0745b653426 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 18:01:00 -0800 Subject: [PATCH 29/71] error message --- airbyte-cdk/python/connector_builder/message_grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py index cbbf392a0e62..18ce725729e8 100644 --- a/airbyte-cdk/python/connector_builder/message_grouper.py +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -32,7 +32,7 @@ def get_grouped_messages(self, record_limit: Optional[int] = None, ) -> StreamRead: if record_limit is not None and not (1 <= record_limit <= 1000): - raise ValueError("") + raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}") schema_inferrer = SchemaInferrer() if record_limit is None: From c4d8b84beefde85393c68f95ffbc796b0507775b Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 18:02:43 -0800 Subject: [PATCH 30/71] rename --- .../connector_builder_handler.py | 2 +- .../connector_builder/message_grouper.py | 12 +-- .../python/connector_builder/models.py | 73 +++++++++++++++++++ .../connector_builder/test_message_grouper.py | 20 ++--- 4 files changed, 90 insertions(+), 17 deletions(-) create mode 100644 airbyte-cdk/python/connector_builder/models.py diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index ec31367e15b5..d35a487068ad 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -27,7 +27,7 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any]): max_slices = command_config["max_slices"] max_records = command_config["max_records"] handler = MessageGrouper(max_pages_per_slice, max_slices) - stream_read = handler.get_grouped_messages(source, config, stream_name, max_records) + stream_read = handler.get_message_groups(source, config, stream_name, max_records) return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( data=dataclasses.asdict(stream_read), stream="_test_read", diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py index 18ce725729e8..c9ba17082263 100644 --- a/airbyte-cdk/python/connector_builder/message_grouper.py +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -25,12 +25,12 @@ def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: self._max_slices = max_slices self.max_record_limit = max_record_limit - def get_grouped_messages(self, - source: DeclarativeSource, - config: Mapping[str, Any], - stream: str, - record_limit: Optional[int] = None, - ) -> StreamRead: + def get_message_groups(self, + source: DeclarativeSource, + config: Mapping[str, Any], + stream: str, + record_limit: Optional[int] = None, + ) -> StreamRead: if record_limit is not None and not (1 <= record_limit <= 1000): raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}") schema_inferrer = SchemaInferrer() diff --git a/airbyte-cdk/python/connector_builder/models.py b/airbyte-cdk/python/connector_builder/models.py new file mode 100644 index 000000000000..9a31c7f8e9b2 --- /dev/null +++ b/airbyte-cdk/python/connector_builder/models.py @@ -0,0 +1,73 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Dict, Optional +from typing import List + + +@dataclass +class HttpResponse: + status: int + body: Optional[str] = None + headers: Optional[Dict[str, Any]] = None + +@dataclass +class HttpRequest: + url: str + parameters: Optional[Dict[str, Any]] + body: Optional[Dict[str, Any]] + headers: Optional[Dict[str, Any]] + http_method: str +@dataclass +class StreamReadPages: + records: List[object] + request: Optional[HttpRequest] = None + response: Optional[HttpResponse] = None + +@dataclass +class StreamReadSlicesInnerPagesInner: + + records: List[object] + request: Optional[HttpRequest] + response: Optional[HttpResponse] + +@dataclass +class StreamReadSlicesInnerSliceDescriptor: + start_datetime: Optional[datetime] + list_item: Optional[str] + +@dataclass +class StreamReadSlicesInner: + pages: List[StreamReadSlicesInnerPagesInner] + slice_descriptor: Optional[StreamReadSlicesInnerSliceDescriptor] + state: Optional[Dict[str, Any]] + +@dataclass +class StreamRead(object): + logs: List[object] + slices: List[StreamReadSlicesInner] + test_read_limit_reached: bool + inferred_schema: Optional[Dict[str, Any]] + +@dataclass +class StreamReadRequestBody: + manifest: Dict[str, Any] + stream: str + config: Dict[str, Any] + state: Optional[Dict[str, Any]] + record_limit: Optional[int] + +@dataclass +class StreamReadSliceDescriptor: + start_datetime: Optional[datetime] = None + list_item: Optional[str] = None + +@dataclass +class StreamReadSlices: + pages: List[StreamReadPages] + slice_descriptor: Optional[StreamReadSliceDescriptor] = None + state: Optional[Dict[str, Any]] = None + diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py index 11b070bb7137..33643d0b40cd 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py @@ -119,7 +119,7 @@ def test_get_grouped_messages(): ) connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: StreamRead = connector_builder_handler.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") assert actual_response.inferred_schema == expected_schema single_slice = actual_response.slices[0] @@ -180,7 +180,7 @@ def test_get_grouped_messages_with_logs(): connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: AirbyteMessage = connector_builder_handler.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: AirbyteMessage = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] @@ -220,7 +220,7 @@ def test_get_grouped_messages_record_limit(request_record_limit, max_record_limi record_limit = min(request_record_limit, max_record_limit) api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: StreamRead = api.get_grouped_messages(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) + actual_response: StreamRead = api.get_message_groups(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): @@ -258,7 +258,7 @@ def test_get_grouped_messages_default_record_limit(max_record_limit): n_records = 2 api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: StreamRead = api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): @@ -289,7 +289,7 @@ def test_get_grouped_messages_limit_0(): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) with pytest.raises(ValueError): - api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras", record_limit=0) + api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras", record_limit=0) def test_get_grouped_messages_no_records(): request = { @@ -337,7 +337,7 @@ def test_get_grouped_messages_no_records(): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: AirbyteMessage = api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: AirbyteMessage = api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): @@ -359,7 +359,7 @@ def test_get_grouped_messages_invalid_group_format(): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) with pytest.raises(ValueError) as actual_exception: - api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") + api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") @pytest.mark.parametrize( "log_message, expected_response", @@ -440,7 +440,7 @@ def test_get_grouped_messages_with_many_slices(): connecto_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = connecto_builder_handler.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = connecto_builder_handler.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") assert not stream_read.test_read_limit_reached assert len(stream_read.slices) == 2 @@ -463,7 +463,7 @@ def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limi api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") assert stream_read.test_read_limit_reached @@ -477,7 +477,7 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = api.get_grouped_messages(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") assert stream_read.test_read_limit_reached From 4c26009b413ccab6f836c5f77e8d9ae2bd96e088 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 18:06:15 -0800 Subject: [PATCH 31/71] update --- .../connector_builder/connector_builder_handler.py | 10 +++++----- airbyte-cdk/python/source_declarative_manifest/main.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index d35a487068ad..127b249fe81f 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -16,11 +16,11 @@ from connector_builder.message_grouper import MessageGrouper -def list_streams() -> AirbyteRecordMessage: +def list_streams() -> AirbyteMessage: raise NotImplementedError -def read_stream(source: DeclarativeSource, config: Mapping[str, Any]): +def read_stream(source: DeclarativeSource, config: Mapping[str, Any]) -> AirbyteMessage: command_config = config.get("__command_config") stream_name = command_config["stream_name"] max_pages_per_slice = command_config["max_pages_per_slice"] @@ -35,13 +35,13 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any]): )) -def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: +def resolve_manifest(source) -> AirbyteMessage: try: - return AirbyteRecordMessage( + return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( data={"manifest": source.resolved_manifest}, emitted_at=_emitted_at(), stream="", - ) + )) except Exception as exc: error = AirbyteTracedException.from_exception(exc, message="Error resolving manifest.") return error.as_airbyte_message() diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 0f41f6826b53..286a84ba0588 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -51,6 +51,7 @@ def execute_command(source: DeclarativeSource, config: Mapping[str, Any]) -> Air return connector_builder_handler.read_stream(source, config) raise ValueError(f"Unrecognized command {command}.") + def handle_connector_builder_request(source: DeclarativeSource, config: Mapping[str, Any]): message = execute_command(source, config) print(message.json(exclude_unset=True)) From 31425f02f845d9b9eccecf50c3928da1d53ae1de Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Thu, 9 Mar 2023 18:20:27 -0800 Subject: [PATCH 32/71] Update --- .../connector_builder_handler.py | 3 -- .../test_connector_builder_handler.py | 2 +- .../connector_builder/test_message_grouper.py | 45 ++++++++++++------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 127b249fe81f..9569ab03318a 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -5,9 +5,6 @@ import dataclasses from datetime import datetime from typing import Any, Mapping -from typing import Union - -from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog from airbyte_cdk.models import AirbyteRecordMessage, AirbyteMessage from airbyte_cdk.models import Type as MessageType diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 26e652df0faf..431a253c5420 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -164,7 +164,7 @@ def test_resolve_manifest(): ], "check": {"type": "CheckStream", "stream_names": ["lists"]}, } - assert resolved_manifest.data["manifest"] == expected_resolved_manifest + assert resolved_manifest.record.data["manifest"] == expected_resolved_manifest def test_resolve_manifest_error_returns_error_response(): diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py index 33643d0b40cd..8a7989efafc3 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py @@ -1,13 +1,16 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - +from typing import Iterator from unittest.mock import MagicMock import pytest -from airbyte_cdk.models import Level -from connector_builder.message_grouper import * +from airbyte_cdk.models import Level, AirbyteMessage, AirbyteRecordMessage, AirbyteLogMessage +from connector_builder.message_grouper import MessageGrouper +from connector_builder.models import StreamReadPages, HttpRequest, HttpResponse, StreamRead +from airbyte_cdk.models import Type as MessageType +import json MAX_PAGES_PER_SLICE = 4 MAX_SLICES = 3 @@ -70,6 +73,7 @@ CONFIG = {"rank": "upper-six"} + def test_get_grouped_messages(): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", @@ -126,6 +130,7 @@ def test_get_grouped_messages(): for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] + def test_get_grouped_messages_with_logs(): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", @@ -167,20 +172,20 @@ def test_get_grouped_messages_with_logs(): mock_source = make_mock_source( iter( [ - AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message before the request")), + AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message before the request")), request_log_message(request), response_log_message(response), record_message("hashiras", {"name": "Shinobu Kocho"}), - AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message during the page")), + AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message during the page")), record_message("hashiras", {"name": "Muichiro Tokito"}), - AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message after the response")), + AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message="log message after the response")), ] ) ) connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: AirbyteMessage = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] @@ -188,6 +193,7 @@ def test_get_grouped_messages_with_logs(): for i, actual_log in enumerate(actual_response.logs): assert actual_log == expected_logs[i] + @pytest.mark.parametrize( "request_record_limit, max_record_limit", [ @@ -220,13 +226,14 @@ def test_get_grouped_messages_record_limit(request_record_limit, max_record_limi record_limit = min(request_record_limit, max_record_limit) api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: StreamRead = api.get_message_groups(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) + actual_response: StreamRead = api.get_message_groups(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): total_records += len(actual_page.records) assert total_records == min([record_limit, n_records]) + @pytest.mark.parametrize( "max_record_limit", [ @@ -265,6 +272,7 @@ def test_get_grouped_messages_default_record_limit(max_record_limit): total_records += len(actual_page.records) assert total_records == min([max_record_limit, n_records]) + def test_get_grouped_messages_limit_0(): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", @@ -291,6 +299,7 @@ def test_get_grouped_messages_limit_0(): with pytest.raises(ValueError): api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras", record_limit=0) + def test_get_grouped_messages_no_records(): request = { "url": "https://demonslayers.com/api/v1/hashiras?era=taisho", @@ -335,14 +344,15 @@ def test_get_grouped_messages_no_records(): ) ) - api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) + message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: AirbyteMessage = api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: StreamRead = message_grouper.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] + def test_get_grouped_messages_invalid_group_format(): response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'} @@ -361,6 +371,7 @@ def test_get_grouped_messages_invalid_group_format(): with pytest.raises(ValueError) as actual_exception: api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + @pytest.mark.parametrize( "log_message, expected_response", [ @@ -413,6 +424,7 @@ def test_create_response_from_log_message(log_message, expected_response): assert actual_response == expected_response + def test_get_grouped_messages_with_many_slices(): request = {} response = {"status_code": 200} @@ -453,6 +465,7 @@ def test_get_grouped_messages_with_many_slices(): assert len(stream_read.slices[1].pages[1].records) == 1 assert len(stream_read.slices[1].pages[2].records) == 0 + def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limit_reached(): maximum_number_of_slices = 5 request = {} @@ -467,6 +480,7 @@ def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limi assert stream_read.test_read_limit_reached + def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit_reached(): maximum_number_of_pages_per_slice = 5 request = {} @@ -481,23 +495,24 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit assert stream_read.test_read_limit_reached + def make_mock_source(return_value: Iterator) -> MagicMock: mock_source = MagicMock() mock_source.read.return_value = return_value return mock_source + def request_log_message(request: dict) -> AirbyteMessage: - return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=f"request:{json.dumps(request)}")) + return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message=f"request:{json.dumps(request)}")) def response_log_message(response: dict) -> AirbyteMessage: - return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=f"response:{json.dumps(response)}")) + return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message=f"response:{json.dumps(response)}")) def record_message(stream: str, data: dict) -> AirbyteMessage: - return AirbyteMessage(type=Type.RECORD, record=AirbyteRecordMessage(stream=stream, data=data, emitted_at=1234)) + return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage(stream=stream, data=data, emitted_at=1234)) def slice_message() -> AirbyteMessage: - return AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message='slice:{"key": "value"}')) - + return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message='slice:{"key": "value"}')) From a7911f511b4b4919e5af68deccdd584e5a2ac862 Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Thu, 9 Mar 2023 23:56:38 +0000 Subject: [PATCH 33/71] CR improvements --- .../connector_builder_handler.py | 21 +++++++++++-------- .../source_declarative_manifest/README.md | 2 +- .../source_declarative_manifest/main.py | 9 ++++---- .../test_connector_builder_handler.py | 3 ++- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index a1d8f6a6f647..a3d3c65a8641 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -3,26 +3,29 @@ # from datetime import datetime -from typing import Union -from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, Type +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.utils.traced_exception import AirbyteTracedException -def list_streams() -> AirbyteRecordMessage: +def list_streams() -> AirbyteMessage: raise NotImplementedError -def stream_read() -> AirbyteRecordMessage: +def stream_read() -> AirbyteMessage: raise NotImplementedError -def resolve_manifest(source) -> Union[AirbyteMessage, AirbyteRecordMessage]: +def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage: try: - return AirbyteRecordMessage( - data={"manifest": source.resolved_manifest}, - emitted_at=_emitted_at(), - stream="", + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + data={"manifest": source.resolved_manifest}, + emitted_at=_emitted_at(), + stream="resolve_manifest", + ), ) except Exception as exc: error = AirbyteTracedException.from_exception(exc, message="Error resolving manifest.") diff --git a/airbyte-cdk/python/source_declarative_manifest/README.md b/airbyte-cdk/python/source_declarative_manifest/README.md index 665e03e8fda1..0ed9a0443476 100644 --- a/airbyte-cdk/python/source_declarative_manifest/README.md +++ b/airbyte-cdk/python/source_declarative_manifest/README.md @@ -7,7 +7,7 @@ This entrypoint is used for connectors created by the connector builder. These c The spec operation is not supported because the config is not known when running a spec. -This entrypoint is also the entrypoint for requests from the [Connector Builder](https://docs.airbyte.com/connector-development/config-based/connector-builder-ui/) Server. In addition to the `__injected_declarative_manifest`, the Connector Builder backend config requires the `__command` key, whose value is one of the commands handled by the ConnectorBuilderHandler (`stream_read`, `list_streams`, or `resolve_manifest`). +This entrypoint is also the entrypoint for requests from the [Connector Builder](https://docs.airbyte.com/connector-development/config-based/connector-builder-ui/) Server. In addition to the `__injected_declarative_manifest`, the [Connector Builder backend](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/connector_builder/README.md) config requires the `__command` key, whose value is one of the commands handled by the ConnectorBuilderHandler (`stream_read`, `list_streams`, or `resolve_manifest`). ## Local development diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 2d66c822477d..369743eda454 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -9,12 +9,11 @@ from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch -from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder import connector_builder_handler -def create_source(config: Mapping[str, Any]) -> DeclarativeSource: +def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: manifest = config.get("__injected_declarative_manifest") return ManifestDeclarativeSource(manifest) @@ -42,14 +41,14 @@ def preparse(args: List[str]) -> Tuple[str, str]: return parsed.command, parsed.config -def handle_connector_builder_request(source: DeclarativeSource, config: Mapping[str, Any]): +def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any]): command = config.get("__command") if command == "resolve_manifest": return connector_builder_handler.resolve_manifest(source) raise ValueError(f"Unrecognized command {command}.") -def handle_connector_request(source: DeclarativeSource, args: List[str]): +def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]): # Verify that the correct args are present for the production codepaths. AirbyteEntrypoint.parse_args(args) launch(source, sys.argv[1:]) @@ -59,7 +58,7 @@ def handle_request(args: List[str]): config = get_config_from_args(args) source = create_source(config) if "__command" in config: - handle_connector_builder_request(source, config) + print(handle_connector_builder_request(source, config)) else: handle_connector_request(source, args) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 26e652df0faf..a0ca267dd45f 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -164,7 +164,8 @@ def test_resolve_manifest(): ], "check": {"type": "CheckStream", "stream_names": ["lists"]}, } - assert resolved_manifest.data["manifest"] == expected_resolved_manifest + assert resolved_manifest.record.data["manifest"] == expected_resolved_manifest + assert resolved_manifest.record.stream == "resolve_manifest" def test_resolve_manifest_error_returns_error_response(): From 59fea9522496a65912249f99ff311ea46d06ecaf Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 14:58:41 -0800 Subject: [PATCH 34/71] fix test_source_declarative_manifest --- .../python/airbyte_cdk/sources/source.py | 5 +- .../connector_builder_handler.py | 27 +++++++-- .../connector_builder/message_grouper.py | 7 +-- .../source_declarative_manifest/main.py | 42 +++++++------- .../test_source_declarative_manifest.py | 58 ++++++++++++------- 5 files changed, 85 insertions(+), 54 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/source.py b/airbyte-cdk/python/airbyte_cdk/sources/source.py index c02728c5aeb3..f6c374140f28 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/source.py @@ -85,5 +85,6 @@ def _emit_legacy_state_format(self, state_obj) -> Union[List[AirbyteStateMessage return [] # can be overridden to change an input catalog - def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog: - return ConfiguredAirbyteCatalog.parse_obj(self._read_json_file(catalog_path)) + @classmethod + def read_catalog(cls, catalog_path: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog.parse_obj(cls._read_json_file(catalog_path)) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 2820b795e238..6612d9f56563 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -6,7 +6,7 @@ from datetime import datetime from typing import Any, Mapping -from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, Type +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, Type, ConfiguredAirbyteCatalog from airbyte_cdk.models import Type as MessageType from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource @@ -14,18 +14,20 @@ from connector_builder.message_grouper import MessageGrouper + + + def list_streams() -> AirbyteMessage: raise NotImplementedError -def read_stream(source: DeclarativeSource, config: Mapping[str, Any]) -> AirbyteMessage: - command_config = config.get("__command_config") - stream_name = command_config["stream_name"] +def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: + command_config = config.get("__test_read_config") max_pages_per_slice = command_config["max_pages_per_slice"] max_slices = command_config["max_slices"] max_records = command_config["max_records"] handler = MessageGrouper(max_pages_per_slice, max_slices) - stream_read = handler.get_message_groups(source, config, stream_name, max_records) + stream_read = handler.get_message_groups(source, config, configured_catalog, max_records) return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( data=dataclasses.asdict(stream_read), stream="_test_read", @@ -47,6 +49,21 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage: error = AirbyteTracedException.from_exception(exc, message="Error resolving manifest.") return error.as_airbyte_message() +CONNECTOR_BUILDER_STREAM_TO_FUNCTIONS = { + "resolve_manifest": resolve_manifest +} + +def is_connector_builder_request(config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog): + # TODO handle test read + stream_names = set([s.stream.name for s in configured_catalog.streams]) + if any([s in CONNECTOR_BUILDER_STREAM_TO_FUNCTIONS for s in stream_names]) or "__test_read_config" in config: + if len(stream_names) != 1: + raise ValueError(f"Only reading from a single stream is supported. Got: {stream_names}") + return True + else: + return False + def _emitted_at(): return int(datetime.now().timestamp()) * 1000 + diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py index c9ba17082263..1f09d87a32d9 100644 --- a/airbyte-cdk/python/connector_builder/message_grouper.py +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -28,7 +28,7 @@ def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: def get_message_groups(self, source: DeclarativeSource, config: Mapping[str, Any], - stream: str, + configured_catalog: ConfiguredAirbyteCatalog, record_limit: Optional[int] = None, ) -> StreamRead: if record_limit is not None and not (1 <= record_limit <= 1000): @@ -43,9 +43,8 @@ def get_message_groups(self, slices = [] log_messages = [] state = {} # No support for incremental sync - catalog = MessageGrouper._create_configure_catalog(stream) for message_group in self._get_message_groups( - source.read(self.logger, config, catalog, state), + source.read(self.logger, config, configured_catalog, state), schema_inferrer, record_limit, ): @@ -58,7 +57,7 @@ def get_message_groups(self, logs=log_messages, slices=slices, test_read_limit_reached=self._has_reached_limit(slices), - inferred_schema=schema_inferrer.get_stream_schema(stream) + inferred_schema=schema_inferrer.get_stream_schema(configured_catalog.streams[0].stream.name) # The connector builder currently only supports reading from a single stream at a time ) def _get_message_groups( diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index cf6fc5e5c147..79951e587886 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -3,16 +3,18 @@ # -import argparse import sys -from typing import Any, List, Mapping, Tuple +from typing import Any, List, Mapping from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.models import AirbyteMessage +from airbyte_cdk.models import ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from airbyte_cdk.sources.source import Source from connector_builder import connector_builder_handler + def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarativeSource: manifest = config.get("__injected_declarative_manifest") return ManifestDeclarativeSource(manifest, debug) @@ -33,40 +35,36 @@ def get_config_from_args(args: List[str]) -> Mapping[str, Any]: return config -def preparse(args: List[str]) -> Tuple[str, str]: - parser = argparse.ArgumentParser() - parser.add_argument("command", type=str, help="Airbyte Protocol command") - parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") - parsed, _ = parser.parse_known_args(args) - return parsed.command, parsed.config +def execute_command(source: ManifestDeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: -def execute_command(source: ManifestDeclarativeSource, config: Mapping[str, Any]) -> AirbyteMessage: - command = config.get("__command") + command = configured_catalog.streams[0].stream.name if command == "resolve_manifest": return connector_builder_handler.resolve_manifest(source) - elif command == "read": - return connector_builder_handler.read_stream(source, config) - raise ValueError(f"Unrecognized command {command}.") + else: + return connector_builder_handler.read_stream(source, config, configured_catalog) -def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any]): - message = execute_command(source, config) +def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog): + message = execute_command(source, config, configured_catalog) print(message.json(exclude_unset=True)) def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]): # Verify that the correct args are present for the production codepaths. - AirbyteEntrypoint.parse_args(args) launch(source, sys.argv[1:]) - def handle_request(args: List[str]): - config = get_config_from_args(args) - is_connector_builder_request = "__command" in config - source = create_source(config, is_connector_builder_request) - if is_connector_builder_request: - handle_connector_builder_request(source, config) + #FIXME: need to make sure the manifest is passed in the config too! + parser = AirbyteEntrypoint.parse_args(args) + config_path = parser.config + catalog_path = parser.catalog + config = BaseConnector.read_config(config_path) + catalog = Source.read_catalog(catalog_path) + is_builder_request = connector_builder_handler.is_connector_builder_request(config, catalog) + source = create_source(config, is_builder_request) + if is_builder_request: + handle_connector_builder_request(source, config, catalog) else: handle_connector_request(source, args) diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index 7b6d75f42f28..6502cdcadbcd 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -4,12 +4,15 @@ import copy import json +from typing import Mapping, Any from unittest import mock import pytest import source_declarative_manifest +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, Type, ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from source_declarative_manifest.main import handle_connector_builder_request, handle_request +import argparse CONFIG = { "__injected_declarative_manifest": { @@ -43,8 +46,6 @@ } } -CATALOG = {} - @pytest.fixture def valid_config_file(tmp_path): @@ -53,7 +54,7 @@ def valid_config_file(tmp_path): @pytest.fixture def catalog_file(tmp_path): - return _write_to_tmp_path(tmp_path, CATALOG, "catalog") + return _write_to_tmp_path(tmp_path, _create_configure_catalog_dict("my_stream"), "catalog") @pytest.fixture @@ -66,7 +67,7 @@ def config_file_without_injection(tmp_path): @pytest.fixture def config_file_with_command(tmp_path): config = copy.deepcopy(CONFIG) - config["__command"] = "command" + config["__test_read_config"] = {"max_records": 10} return _write_to_tmp_path(tmp_path, config, "config") @@ -77,21 +78,21 @@ def _write_to_tmp_path(tmp_path, config, filename): def test_on_spec_command_then_raise_value_error(valid_config_file): - with pytest.raises(ValueError): + with pytest.raises(SystemExit): handle_request(["spec", "--config", str(valid_config_file)]) @pytest.mark.parametrize( - "command", + "command, expected_exception_type", [ - pytest.param("check", id="test_check_command_error"), - pytest.param("discover", id="test_discover_command_error"), - pytest.param("read", id="test_read_command_error"), - pytest.param("asdf", id="test_arbitrary_command_error"), + pytest.param("check", AttributeError, id="test_check_command_error"), + pytest.param("discover", AttributeError, id="test_discover_command_error"), + pytest.param("read", SystemExit, id="test_read_command_error"), + pytest.param("asdf", SystemExit, id="test_arbitrary_command_error") ], ) -def test_given_no_injected_declarative_manifest_then_raise_value_error(command, config_file_without_injection): - with pytest.raises(ValueError): +def test_given_no_injected_declarative_manifest_then_raise_error(command, expected_exception_type, config_file_without_injection): + with pytest.raises(expected_exception_type): handle_request([command, "--config", str(config_file_without_injection)]) @@ -114,17 +115,18 @@ def test_missing_config_raises_value_error(command): [ pytest.param("check", id="test_check_command"), pytest.param("discover", id="test_discover_command"), - pytest.param("read", id="test_read_command"), + #pytest.param("read", id="test_read_command"), FIXME: this should be in a separate test ], ) def test_given_injected_declarative_manifest_then_launch_with_declarative_manifest(command, valid_config_file, catalog_file): with mock.patch("source_declarative_manifest.main.launch") as patch: if command == "read": handle_request([command, "--config", str(valid_config_file), "--catalog", str(catalog_file)]) + source, _ = patch.call_args[0] + assert isinstance(source, ManifestDeclarativeSource) else: - handle_request([command, "--config", str(valid_config_file)]) - source, _ = patch.call_args[0] - assert isinstance(source, ManifestDeclarativeSource) + with pytest.raises(AttributeError): + handle_request([command, "--config", str(valid_config_file)]) def test_given_injected_declarative_manifest_then_launch_with_declarative_manifest_missing_arg(valid_config_file): @@ -132,9 +134,9 @@ def test_given_injected_declarative_manifest_then_launch_with_declarative_manife handle_request(["read", "--config", str(valid_config_file)]) -def test_given_command_then_use_connector_builder_handler(config_file_with_command): +def test_given_command_then_use_connector_builder_handler(config_file_with_command, catalog_file): with mock.patch.object(source_declarative_manifest.main, "handle_connector_builder_request") as patch: - handle_request(["read", "--config", str(config_file_with_command)]) + handle_request(["read", "--config", str(config_file_with_command), "--catalog", str(catalog_file)]) assert patch.call_count == 1 @@ -148,7 +150,21 @@ def test_given_command_then_use_connector_builder_handler(config_file_with_comma ) def test_invalid_command(command): config = copy.deepcopy(CONFIG) - config["__command"] = command source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) - with pytest.raises(ValueError): - handle_connector_builder_request(source, config) + with pytest.raises(Exception): + handle_connector_builder_request(source, config, ConfiguredAirbyteCatalog.parse_obj(_create_configure_catalog_dict(command))) + +def _create_configure_catalog_dict(stream_name: str) -> Mapping[str, Any]: + return { + "streams": [ + { + "stream": { + "name": stream_name, + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] + } From b190176192878603ea79c218bad5b6171e6ba8cf Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 15:03:39 -0800 Subject: [PATCH 35/71] fix tests --- .../connector_builder/test_message_grouper.py | 52 ++++++++++++++----- .../test_source_declarative_manifest.py | 4 +- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py index 8a7989efafc3..ac887e217fd6 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py @@ -1,16 +1,17 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import json from typing import Iterator from unittest.mock import MagicMock import pytest -from airbyte_cdk.models import Level, AirbyteMessage, AirbyteRecordMessage, AirbyteLogMessage +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog +from airbyte_cdk.models import Level, AirbyteLogMessage +from airbyte_cdk.models import Type as MessageType from connector_builder.message_grouper import MessageGrouper from connector_builder.models import StreamReadPages, HttpRequest, HttpResponse, StreamRead -from airbyte_cdk.models import Type as MessageType -import json MAX_PAGES_PER_SLICE = 4 MAX_SLICES = 3 @@ -123,7 +124,8 @@ def test_get_grouped_messages(): ) connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, + configured_catalog=_create_configured_catalog("hashiras")) assert actual_response.inferred_schema == expected_schema single_slice = actual_response.slices[0] @@ -185,7 +187,8 @@ def test_get_grouped_messages_with_logs(): connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, + configured_catalog=_create_configured_catalog("hashiras")) single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] @@ -226,7 +229,9 @@ def test_get_grouped_messages_record_limit(request_record_limit, max_record_limi record_limit = min(request_record_limit, max_record_limit) api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: StreamRead = api.get_message_groups(mock_source, config=CONFIG, stream="hashiras", record_limit=request_record_limit) + actual_response: StreamRead = api.get_message_groups(mock_source, config=CONFIG, + configured_catalog=_create_configured_catalog("hashiras"), + record_limit=request_record_limit) single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): @@ -265,7 +270,8 @@ def test_get_grouped_messages_default_record_limit(max_record_limit): n_records = 2 api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) - actual_response: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, + configured_catalog=_create_configured_catalog("hashiras")) single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): @@ -297,7 +303,7 @@ def test_get_grouped_messages_limit_0(): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) with pytest.raises(ValueError): - api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras", record_limit=0) + api.get_message_groups(source=mock_source, config=CONFIG, configured_catalog=_create_configured_catalog("hashiras"), record_limit=0) def test_get_grouped_messages_no_records(): @@ -346,7 +352,8 @@ def test_get_grouped_messages_no_records(): message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - actual_response: StreamRead = message_grouper.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + actual_response: StreamRead = message_grouper.get_message_groups(source=mock_source, config=CONFIG, + configured_catalog=_create_configured_catalog("hashiras")) single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): @@ -369,7 +376,7 @@ def test_get_grouped_messages_invalid_group_format(): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) with pytest.raises(ValueError) as actual_exception: - api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + api.get_message_groups(source=mock_source, config=CONFIG, configured_catalog=_create_configured_catalog("hashiras")) @pytest.mark.parametrize( @@ -452,7 +459,8 @@ def test_get_grouped_messages_with_many_slices(): connecto_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = connecto_builder_handler.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = connecto_builder_handler.get_message_groups(source=mock_source, config=CONFIG, + configured_catalog=_create_configured_catalog("hashiras")) assert not stream_read.test_read_limit_reached assert len(stream_read.slices) == 2 @@ -476,7 +484,8 @@ def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limi api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, + configured_catalog=_create_configured_catalog("hashiras")) assert stream_read.test_read_limit_reached @@ -491,7 +500,8 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, stream="hashiras") + stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, + configured_catalog=_create_configured_catalog("hashiras")) assert stream_read.test_read_limit_reached @@ -516,3 +526,19 @@ def record_message(stream: str, data: dict) -> AirbyteMessage: def slice_message() -> AirbyteMessage: return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message='slice:{"key": "value"}')) + + +def _create_configured_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog.parse_obj({ + "streams": [ + { + "stream": { + "name": stream_name, + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] + }) diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index 6502cdcadbcd..2c3df082abac 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -8,11 +8,11 @@ from unittest import mock import pytest + import source_declarative_manifest -from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, Type, ConfiguredAirbyteCatalog +from airbyte_cdk.models import ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from source_declarative_manifest.main import handle_connector_builder_request, handle_request -import argparse CONFIG = { "__injected_declarative_manifest": { From 8c51bb1ab8f42df3422501c04df948defee02845 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 15:07:09 -0800 Subject: [PATCH 36/71] Update --- .../connector_builder/connector_builder_handler.py | 14 ++++++-------- .../python/source_declarative_manifest/main.py | 11 ++++++----- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 6612d9f56563..3f5166b6c8d7 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -14,9 +14,6 @@ from connector_builder.message_grouper import MessageGrouper - - - def list_streams() -> AirbyteMessage: raise NotImplementedError @@ -49,14 +46,16 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage: error = AirbyteTracedException.from_exception(exc, message="Error resolving manifest.") return error.as_airbyte_message() -CONNECTOR_BUILDER_STREAM_TO_FUNCTIONS = { - "resolve_manifest": resolve_manifest + +CONNECTOR_BUILDER_STREAMS= { + "resolve_manifest", + "list_streams" } + def is_connector_builder_request(config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog): - # TODO handle test read stream_names = set([s.stream.name for s in configured_catalog.streams]) - if any([s in CONNECTOR_BUILDER_STREAM_TO_FUNCTIONS for s in stream_names]) or "__test_read_config" in config: + if any([s in CONNECTOR_BUILDER_STREAMS for s in stream_names]) or "__test_read_config" in config: if len(stream_names) != 1: raise ValueError(f"Only reading from a single stream is supported. Got: {stream_names}") return True @@ -66,4 +65,3 @@ def is_connector_builder_request(config: Mapping[str, Any], configured_catalog: def _emitted_at(): return int(datetime.now().timestamp()) * 1000 - diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 79951e587886..d358f40462df 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -35,9 +35,8 @@ def get_config_from_args(args: List[str]) -> Mapping[str, Any]: return config - -def execute_command(source: ManifestDeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: - +def execute_command(source: ManifestDeclarativeSource, config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: command = configured_catalog.streams[0].stream.name if command == "resolve_manifest": return connector_builder_handler.resolve_manifest(source) @@ -45,7 +44,8 @@ def execute_command(source: ManifestDeclarativeSource, config: Mapping[str, Any] return connector_builder_handler.read_stream(source, config, configured_catalog) -def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog): +def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog): message = execute_command(source, config, configured_catalog) print(message.json(exclude_unset=True)) @@ -54,8 +54,9 @@ def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]) # Verify that the correct args are present for the production codepaths. launch(source, sys.argv[1:]) + def handle_request(args: List[str]): - #FIXME: need to make sure the manifest is passed in the config too! + # FIXME: need to make sure the manifest is passed in the config too! parser = AirbyteEntrypoint.parse_args(args) config_path = parser.config catalog_path = parser.catalog From ad3a9c4d6c1844ab5d1ce999e2cb2deb3eb910c1 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 15:12:26 -0800 Subject: [PATCH 37/71] Update --- .../connector_builder_handler.py | 2 +- .../python/source_declarative_manifest/main.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 3f5166b6c8d7..42bfa090eb87 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -47,7 +47,7 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage: return error.as_airbyte_message() -CONNECTOR_BUILDER_STREAMS= { +CONNECTOR_BUILDER_STREAMS = { "resolve_manifest", "list_streams" } diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index d358f40462df..89bc6b257d84 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -56,17 +56,21 @@ def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]) def handle_request(args: List[str]): - # FIXME: need to make sure the manifest is passed in the config too! parser = AirbyteEntrypoint.parse_args(args) + command = parser.command config_path = parser.config - catalog_path = parser.catalog config = BaseConnector.read_config(config_path) - catalog = Source.read_catalog(catalog_path) - is_builder_request = connector_builder_handler.is_connector_builder_request(config, catalog) - source = create_source(config, is_builder_request) - if is_builder_request: - handle_connector_builder_request(source, config, catalog) + if command == "read": + catalog_path = parser.catalog + catalog = Source.read_catalog(catalog_path) + is_builder_request = connector_builder_handler.is_connector_builder_request(config, catalog) + source = create_source(config, is_builder_request) + if is_builder_request: + handle_connector_builder_request(source, config, catalog) + else: + handle_connector_request(source, args) else: + source = create_source(config, False) handle_connector_request(source, args) From e1e25983dd62a406fb4f7ca7e173be29cc9bc3b1 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 15:37:09 -0800 Subject: [PATCH 38/71] Update --- .../source_declarative_manifest/main.py | 18 ++------ .../test_connector_builder_handler.py | 24 ++++++++++- .../connector_builder/test_message_grouper.py | 39 ++++++----------- .../unit_tests/connector_builder/utils.py | 22 ++++++++++ .../test_source_declarative_manifest.py | 43 +++++++------------ 5 files changed, 76 insertions(+), 70 deletions(-) create mode 100644 airbyte-cdk/python/unit_tests/connector_builder/utils.py diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 89bc6b257d84..7365b4c73409 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -20,20 +20,6 @@ def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarative return ManifestDeclarativeSource(manifest, debug) -def get_config_from_args(args: List[str]) -> Mapping[str, Any]: - command, config_filepath = preparse(args) - if command == "spec": - raise ValueError("spec command is not supported for injected declarative manifest") - - config = BaseConnector.read_config(config_filepath) - - if "__injected_declarative_manifest" not in config: - raise ValueError( - f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}" - ) - - return config - def execute_command(source: ManifestDeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: @@ -60,6 +46,10 @@ def handle_request(args: List[str]): command = parser.command config_path = parser.config config = BaseConnector.read_config(config_path) + if "__injected_declarative_manifest" not in config: + raise ValueError( + f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}" + ) if command == "read": catalog_path = parser.catalog catalog = Source.read_catalog(catalog_path) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index a0ca267dd45f..7bdc96fbdf62 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -1,9 +1,12 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import pytest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import resolve_manifest +from connector_builder.connector_builder_handler import resolve_manifest, is_connector_builder_request +from unit_tests.connector_builder.utils import create_configured_catalog + _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -44,7 +47,13 @@ CONFIG = { "__injected_declarative_manifest": MANIFEST, - "__command": "resolve_manifest", +} + +TEST_READ_CONFIG = { + "__injected_declarative_manifest": MANIFEST, + "__test_read_config": { + "max_records": 10 + } } @@ -177,3 +186,14 @@ def resolved_manifest(self): source = MockManifestDeclarativeSource() response = resolve_manifest(source) assert "Error resolving manifest" in response.trace.error.message + +@pytest.mark.parametrize("test_name, config, configured_catalog, expected_result", + [ + ("test_resolve_manifest_is_connector_builder_request", CONFIG, create_configured_catalog("resolve_manifest"), True), + ("test_list_streams_is_connector_builder_request", CONFIG, create_configured_catalog("list_streams"), True), + ("test_regular_stream_is_not_connector_builder_request", CONFIG, create_configured_catalog("my_stream"), False), + ("test_regular_stream_with_test_read_config_is_connector_builder_request", TEST_READ_CONFIG, create_configured_catalog("my_stream"), True), + ]) +def test_is_connector_builder_request(test_name, config, configured_catalog, expected_result): + result = is_connector_builder_request(config, configured_catalog) + assert result == expected_result diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py index ac887e217fd6..db2cf2dcf04f 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py @@ -7,11 +7,12 @@ import pytest -from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage from airbyte_cdk.models import Level, AirbyteLogMessage from airbyte_cdk.models import Type as MessageType from connector_builder.message_grouper import MessageGrouper from connector_builder.models import StreamReadPages, HttpRequest, HttpResponse, StreamRead +from unit_tests.connector_builder.utils import create_configured_catalog MAX_PAGES_PER_SLICE = 4 MAX_SLICES = 3 @@ -125,7 +126,7 @@ def test_get_grouped_messages(): connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, - configured_catalog=_create_configured_catalog("hashiras")) + configured_catalog=create_configured_catalog("hashiras")) assert actual_response.inferred_schema == expected_schema single_slice = actual_response.slices[0] @@ -188,7 +189,7 @@ def test_get_grouped_messages_with_logs(): connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG, - configured_catalog=_create_configured_catalog("hashiras")) + configured_catalog=create_configured_catalog("hashiras")) single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): assert actual_page == expected_pages[i] @@ -230,7 +231,7 @@ def test_get_grouped_messages_record_limit(request_record_limit, max_record_limi api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) actual_response: StreamRead = api.get_message_groups(mock_source, config=CONFIG, - configured_catalog=_create_configured_catalog("hashiras"), + configured_catalog=create_configured_catalog("hashiras"), record_limit=request_record_limit) single_slice = actual_response.slices[0] total_records = 0 @@ -271,7 +272,7 @@ def test_get_grouped_messages_default_record_limit(max_record_limit): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit) actual_response: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, - configured_catalog=_create_configured_catalog("hashiras")) + configured_catalog=create_configured_catalog("hashiras")) single_slice = actual_response.slices[0] total_records = 0 for i, actual_page in enumerate(single_slice.pages): @@ -303,7 +304,7 @@ def test_get_grouped_messages_limit_0(): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) with pytest.raises(ValueError): - api.get_message_groups(source=mock_source, config=CONFIG, configured_catalog=_create_configured_catalog("hashiras"), record_limit=0) + api.get_message_groups(source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras"), record_limit=0) def test_get_grouped_messages_no_records(): @@ -353,7 +354,7 @@ def test_get_grouped_messages_no_records(): message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) actual_response: StreamRead = message_grouper.get_message_groups(source=mock_source, config=CONFIG, - configured_catalog=_create_configured_catalog("hashiras")) + configured_catalog=create_configured_catalog("hashiras")) single_slice = actual_response.slices[0] for i, actual_page in enumerate(single_slice.pages): @@ -376,7 +377,7 @@ def test_get_grouped_messages_invalid_group_format(): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) with pytest.raises(ValueError) as actual_exception: - api.get_message_groups(source=mock_source, config=CONFIG, configured_catalog=_create_configured_catalog("hashiras")) + api.get_message_groups(source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")) @pytest.mark.parametrize( @@ -460,7 +461,7 @@ def test_get_grouped_messages_with_many_slices(): connecto_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) stream_read: StreamRead = connecto_builder_handler.get_message_groups(source=mock_source, config=CONFIG, - configured_catalog=_create_configured_catalog("hashiras")) + configured_catalog=create_configured_catalog("hashiras")) assert not stream_read.test_read_limit_reached assert len(stream_read.slices) == 2 @@ -485,7 +486,7 @@ def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limi api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, - configured_catalog=_create_configured_catalog("hashiras")) + configured_catalog=create_configured_catalog("hashiras")) assert stream_read.test_read_limit_reached @@ -501,7 +502,7 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG, - configured_catalog=_create_configured_catalog("hashiras")) + configured_catalog=create_configured_catalog("hashiras")) assert stream_read.test_read_limit_reached @@ -526,19 +527,3 @@ def record_message(stream: str, data: dict) -> AirbyteMessage: def slice_message() -> AirbyteMessage: return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message='slice:{"key": "value"}')) - - -def _create_configured_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: - return ConfiguredAirbyteCatalog.parse_obj({ - "streams": [ - { - "stream": { - "name": stream_name, - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - } - ] - }) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/utils.py b/airbyte-cdk/python/unit_tests/connector_builder/utils.py new file mode 100644 index 000000000000..9566d203901f --- /dev/null +++ b/airbyte-cdk/python/unit_tests/connector_builder/utils.py @@ -0,0 +1,22 @@ +from typing import Any, Mapping +from airbyte_cdk.models.airbyte_protocol import ConfiguredAirbyteCatalog + + +def create_configured_catalog_dict(stream_name: str) -> Mapping[str, Any]: + return { + "streams": [ + { + "stream": { + "name": stream_name, + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + } + ] + } + + +def create_configured_catalog(stream_name: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog.parse_obj(create_configured_catalog_dict(stream_name)) diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index 2c3df082abac..71fe294d3f14 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -13,6 +13,7 @@ from airbyte_cdk.models import ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from source_declarative_manifest.main import handle_connector_builder_request, handle_request +from unit_tests.connector_builder.utils import create_configured_catalog_dict CONFIG = { "__injected_declarative_manifest": { @@ -54,7 +55,7 @@ def valid_config_file(tmp_path): @pytest.fixture def catalog_file(tmp_path): - return _write_to_tmp_path(tmp_path, _create_configure_catalog_dict("my_stream"), "catalog") + return _write_to_tmp_path(tmp_path, create_configured_catalog_dict("my_stream"), "catalog") @pytest.fixture @@ -85,15 +86,18 @@ def test_on_spec_command_then_raise_value_error(valid_config_file): @pytest.mark.parametrize( "command, expected_exception_type", [ - pytest.param("check", AttributeError, id="test_check_command_error"), - pytest.param("discover", AttributeError, id="test_discover_command_error"), - pytest.param("read", SystemExit, id="test_read_command_error"), + pytest.param("check", ValueError, id="test_check_command_error"), + pytest.param("discover", ValueError, id="test_discover_command_error"), + pytest.param("read", ValueError, id="test_read_command_error"), pytest.param("asdf", SystemExit, id="test_arbitrary_command_error") ], ) -def test_given_no_injected_declarative_manifest_then_raise_error(command, expected_exception_type, config_file_without_injection): +def test_given_no_injected_declarative_manifest_then_raise_error(command, expected_exception_type, config_file_without_injection, catalog_file): with pytest.raises(expected_exception_type): - handle_request([command, "--config", str(config_file_without_injection)]) + if command == "read": + handle_request([command, "--config", str(config_file_without_injection), "--catalog", str(catalog_file)]) + else: + handle_request([command, "--config", str(config_file_without_injection)]) @pytest.mark.parametrize( @@ -115,18 +119,17 @@ def test_missing_config_raises_value_error(command): [ pytest.param("check", id="test_check_command"), pytest.param("discover", id="test_discover_command"), - #pytest.param("read", id="test_read_command"), FIXME: this should be in a separate test + pytest.param("read", id="test_read_command"), ], ) def test_given_injected_declarative_manifest_then_launch_with_declarative_manifest(command, valid_config_file, catalog_file): with mock.patch("source_declarative_manifest.main.launch") as patch: if command == "read": handle_request([command, "--config", str(valid_config_file), "--catalog", str(catalog_file)]) - source, _ = patch.call_args[0] - assert isinstance(source, ManifestDeclarativeSource) else: - with pytest.raises(AttributeError): - handle_request([command, "--config", str(valid_config_file)]) + handle_request([command, "--config", str(valid_config_file)]) + source, _ = patch.call_args[0] + assert isinstance(source, ManifestDeclarativeSource) def test_given_injected_declarative_manifest_then_launch_with_declarative_manifest_missing_arg(valid_config_file): @@ -152,19 +155,5 @@ def test_invalid_command(command): config = copy.deepcopy(CONFIG) source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) with pytest.raises(Exception): - handle_connector_builder_request(source, config, ConfiguredAirbyteCatalog.parse_obj(_create_configure_catalog_dict(command))) - -def _create_configure_catalog_dict(stream_name: str) -> Mapping[str, Any]: - return { - "streams": [ - { - "stream": { - "name": stream_name, - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - } - ] - } + handle_connector_builder_request(source, config, ConfiguredAirbyteCatalog.parse_obj(create_configured_catalog_dict(command))) + From 639accdbacaa844aa7bc9a7e144e7601421f6a10 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 16:03:02 -0800 Subject: [PATCH 39/71] Update --- .../connector_builder_handler.py | 12 ++++++-- .../source_declarative_manifest/main.py | 20 ++----------- .../test_connector_builder_handler.py | 28 ++++++++++++++----- .../test_source_declarative_manifest.py | 22 ++------------- 4 files changed, 35 insertions(+), 47 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 42bfa090eb87..6975861de950 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -12,6 +12,7 @@ from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.utils.traced_exception import AirbyteTracedException from connector_builder.message_grouper import MessageGrouper +from functools import partial def list_streams() -> AirbyteMessage: @@ -58,9 +59,14 @@ def is_connector_builder_request(config: Mapping[str, Any], configured_catalog: if any([s in CONNECTOR_BUILDER_STREAMS for s in stream_names]) or "__test_read_config" in config: if len(stream_names) != 1: raise ValueError(f"Only reading from a single stream is supported. Got: {stream_names}") - return True - else: - return False + command = next(iter(stream_names)) # Connector builder only supports reading from a single stream + if command == "resolve_manifest": + return resolve_manifest + elif command == "list_streams": + return list_streams + else: + return partial(read_stream, config=config, configured_catalog=configured_catalog) + return None def _emitted_at(): diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 7365b4c73409..738163d808dc 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -20,25 +20,9 @@ def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarative return ManifestDeclarativeSource(manifest, debug) - -def execute_command(source: ManifestDeclarativeSource, config: Mapping[str, Any], - configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: - command = configured_catalog.streams[0].stream.name - if command == "resolve_manifest": - return connector_builder_handler.resolve_manifest(source) - else: - return connector_builder_handler.read_stream(source, config, configured_catalog) - - -def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any], - configured_catalog: ConfiguredAirbyteCatalog): - message = execute_command(source, config, configured_catalog) - print(message.json(exclude_unset=True)) - - def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]): # Verify that the correct args are present for the production codepaths. - launch(source, sys.argv[1:]) + launch(source, args) def handle_request(args: List[str]): @@ -56,7 +40,7 @@ def handle_request(args: List[str]): is_builder_request = connector_builder_handler.is_connector_builder_request(config, catalog) source = create_source(config, is_builder_request) if is_builder_request: - handle_connector_builder_request(source, config, catalog) + print(is_builder_request(source)) else: handle_connector_request(source, args) else: diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 7bdc96fbdf62..c5a7556a620e 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -3,10 +3,11 @@ # import pytest +import connector_builder.connector_builder_handler from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest, is_connector_builder_request from unit_tests.connector_builder.utils import create_configured_catalog - +from functools import partial _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -44,7 +45,6 @@ "check": {"type": "CheckStream", "stream_names": ["lists"]}, } - CONFIG = { "__injected_declarative_manifest": MANIFEST, } @@ -187,13 +187,27 @@ def resolved_manifest(self): response = resolve_manifest(source) assert "Error resolving manifest" in response.trace.error.message + @pytest.mark.parametrize("test_name, config, configured_catalog, expected_result", [ - ("test_resolve_manifest_is_connector_builder_request", CONFIG, create_configured_catalog("resolve_manifest"), True), - ("test_list_streams_is_connector_builder_request", CONFIG, create_configured_catalog("list_streams"), True), - ("test_regular_stream_is_not_connector_builder_request", CONFIG, create_configured_catalog("my_stream"), False), - ("test_regular_stream_with_test_read_config_is_connector_builder_request", TEST_READ_CONFIG, create_configured_catalog("my_stream"), True), + ("test_resolve_manifest_is_connector_builder_request", CONFIG, create_configured_catalog("resolve_manifest"), connector_builder.connector_builder_handler.resolve_manifest), + ("test_list_streams_is_connector_builder_request", CONFIG, create_configured_catalog("list_streams"), connector_builder.connector_builder_handler.list_streams), + ("test_regular_stream_is_not_connector_builder_request", CONFIG, create_configured_catalog("my_stream"), None), + ("test_regular_stream_with_test_read_config_is_connector_builder_request", TEST_READ_CONFIG, + create_configured_catalog("my_stream"), + partial(connector_builder.connector_builder_handler.read_stream, config=TEST_READ_CONFIG, + configured_catalog=create_configured_catalog("my_stream"))), ]) def test_is_connector_builder_request(test_name, config, configured_catalog, expected_result): result = is_connector_builder_request(config, configured_catalog) - assert result == expected_result + if isinstance(expected_result, partial): + assert partial_functions_equal(expected_result, result) + else: + assert result == expected_result + + +def partial_functions_equal(func1, func2): + if not (isinstance(func1, partial) and isinstance(func2, partial)): + return False + are_equal = all([getattr(func1, attr) == getattr(func2, attr) for attr in ['func', 'args', 'keywords']]) + return are_equal diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index 71fe294d3f14..a3cd55dd2301 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -10,9 +10,9 @@ import pytest import source_declarative_manifest -from airbyte_cdk.models import ConfiguredAirbyteCatalog +import connector_builder from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from source_declarative_manifest.main import handle_connector_builder_request, handle_request +from source_declarative_manifest.main import handle_request from unit_tests.connector_builder.utils import create_configured_catalog_dict CONFIG = { @@ -138,22 +138,6 @@ def test_given_injected_declarative_manifest_then_launch_with_declarative_manife def test_given_command_then_use_connector_builder_handler(config_file_with_command, catalog_file): - with mock.patch.object(source_declarative_manifest.main, "handle_connector_builder_request") as patch: + with mock.patch.object(connector_builder.connector_builder_handler, "is_connector_builder_request") as patch: handle_request(["read", "--config", str(config_file_with_command), "--catalog", str(catalog_file)]) assert patch.call_count == 1 - - -@pytest.mark.parametrize( - "command", - [ - pytest.param("asdf", id="test_arbitrary_command_error"), - pytest.param(None, id="test_command_is_none_error"), - pytest.param("", id="test_command_is_empty_error"), - ], -) -def test_invalid_command(command): - config = copy.deepcopy(CONFIG) - source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) - with pytest.raises(Exception): - handle_connector_builder_request(source, config, ConfiguredAirbyteCatalog.parse_obj(create_configured_catalog_dict(command))) - From 1fbbc8fbe7770b0700dbcedd677b949330662b04 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 16:07:22 -0800 Subject: [PATCH 40/71] rename --- .../python/connector_builder/connector_builder_handler.py | 2 +- airbyte-cdk/python/source_declarative_manifest/main.py | 2 +- .../connector_builder/test_connector_builder_handler.py | 6 +++--- .../python/unit_tests/test_source_declarative_manifest.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 6975861de950..b4d6ad8f9b3e 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -54,7 +54,7 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage: } -def is_connector_builder_request(config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog): +def get_connector_builder_request_handler(config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog): stream_names = set([s.stream.name for s in configured_catalog.streams]) if any([s in CONNECTOR_BUILDER_STREAMS for s in stream_names]) or "__test_read_config" in config: if len(stream_names) != 1: diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 738163d808dc..9e5f2078e694 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -37,7 +37,7 @@ def handle_request(args: List[str]): if command == "read": catalog_path = parser.catalog catalog = Source.read_catalog(catalog_path) - is_builder_request = connector_builder_handler.is_connector_builder_request(config, catalog) + is_builder_request = connector_builder_handler.get_connector_builder_request_handler(config, catalog) source = create_source(config, is_builder_request) if is_builder_request: print(is_builder_request(source)) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index c5a7556a620e..955224091392 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -5,7 +5,7 @@ import connector_builder.connector_builder_handler from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import resolve_manifest, is_connector_builder_request +from connector_builder.connector_builder_handler import resolve_manifest, get_connector_builder_request_handler from unit_tests.connector_builder.utils import create_configured_catalog from functools import partial @@ -198,8 +198,8 @@ def resolved_manifest(self): partial(connector_builder.connector_builder_handler.read_stream, config=TEST_READ_CONFIG, configured_catalog=create_configured_catalog("my_stream"))), ]) -def test_is_connector_builder_request(test_name, config, configured_catalog, expected_result): - result = is_connector_builder_request(config, configured_catalog) +def test_get_connector_builder_request(test_name, config, configured_catalog, expected_result): + result = get_connector_builder_request_handler(config, configured_catalog) if isinstance(expected_result, partial): assert partial_functions_equal(expected_result, result) else: diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index a3cd55dd2301..4c52b6ec6da4 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -138,6 +138,6 @@ def test_given_injected_declarative_manifest_then_launch_with_declarative_manife def test_given_command_then_use_connector_builder_handler(config_file_with_command, catalog_file): - with mock.patch.object(connector_builder.connector_builder_handler, "is_connector_builder_request") as patch: + with mock.patch.object(connector_builder.connector_builder_handler, "get_connector_builder_request_handler") as patch: handle_request(["read", "--config", str(config_file_with_command), "--catalog", str(catalog_file)]) assert patch.call_count == 1 From bf0175d37ebdbde5e0f18bc0340a0d26e8cebaf9 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 16:08:34 -0800 Subject: [PATCH 41/71] rename --- airbyte-cdk/python/connector_builder/message_grouper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py index 1f09d87a32d9..e8024706263e 100644 --- a/airbyte-cdk/python/connector_builder/message_grouper.py +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -23,7 +23,7 @@ class MessageGrouper: def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: int = 1000): self._max_pages_per_slice = max_pages_per_slice self._max_slices = max_slices - self.max_record_limit = max_record_limit + self._max_record_limit = max_record_limit def get_message_groups(self, source: DeclarativeSource, @@ -36,9 +36,9 @@ def get_message_groups(self, schema_inferrer = SchemaInferrer() if record_limit is None: - record_limit = self.max_record_limit + record_limit = self._max_record_limit else: - record_limit = min(record_limit, self.max_record_limit) + record_limit = min(record_limit, self._max_record_limit) slices = [] log_messages = [] From 1e6b3d2c2be1d92e714c960d4c042a365b372cd7 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Fri, 10 Mar 2023 16:21:00 -0800 Subject: [PATCH 42/71] rename --- airbyte-cdk/python/source_declarative_manifest/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 9e5f2078e694..fb532a61b487 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -37,10 +37,10 @@ def handle_request(args: List[str]): if command == "read": catalog_path = parser.catalog catalog = Source.read_catalog(catalog_path) - is_builder_request = connector_builder_handler.get_connector_builder_request_handler(config, catalog) - source = create_source(config, is_builder_request) - if is_builder_request: - print(is_builder_request(source)) + builder_request_handle = connector_builder_handler.get_connector_builder_request_handler(config, catalog) + source = create_source(config, builder_request_handle is not None) + if builder_request_handle: + print(builder_request_handle(source)) else: handle_connector_request(source, args) else: From 032c44c77d7e5aabc564726812b480ecbede1bc7 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Sun, 12 Mar 2023 21:38:40 -0700 Subject: [PATCH 43/71] format --- .../connector_builder/connector_builder_handler.py | 5 +++-- airbyte-cdk/python/connector_builder/message_grouper.py | 9 ++++----- airbyte-cdk/python/connector_builder/models.py | 3 +-- airbyte-cdk/python/source_declarative_manifest/main.py | 3 +-- .../connector_builder/test_connector_builder_handler.py | 7 ++++--- .../unit_tests/connector_builder/test_message_grouper.py | 7 +++---- airbyte-cdk/python/unit_tests/connector_builder/utils.py | 5 +++++ .../unit_tests/test_source_declarative_manifest.py | 5 ++--- 8 files changed, 23 insertions(+), 21 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index b4d6ad8f9b3e..4cbe4c321c1a 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -4,15 +4,16 @@ import dataclasses from datetime import datetime +from functools import partial from typing import Any, Mapping -from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, Type, ConfiguredAirbyteCatalog +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog +from airbyte_cdk.models import Type from airbyte_cdk.models import Type as MessageType from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.utils.traced_exception import AirbyteTracedException from connector_builder.message_grouper import MessageGrouper -from functools import partial def list_streams() -> AirbyteMessage: diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py index e8024706263e..50213ab792a3 100644 --- a/airbyte-cdk/python/connector_builder/message_grouper.py +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -3,18 +3,17 @@ # import json +import logging +from copy import deepcopy from json import JSONDecodeError from typing import Any, Iterable, Iterator, Mapping, Optional, Union from urllib.parse import parse_qs, urlparse -from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog - from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.utils.schema_inferrer import SchemaInferrer -from connector_builder.models import StreamRead, StreamReadPages, HttpResponse, HttpRequest, StreamReadSlices -import logging -from copy import deepcopy +from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog +from connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages, StreamReadSlices class MessageGrouper: diff --git a/airbyte-cdk/python/connector_builder/models.py b/airbyte-cdk/python/connector_builder/models.py index 9a31c7f8e9b2..732f5816c498 100644 --- a/airbyte-cdk/python/connector_builder/models.py +++ b/airbyte-cdk/python/connector_builder/models.py @@ -4,8 +4,7 @@ from dataclasses import dataclass from datetime import datetime -from typing import Any, Dict, Optional -from typing import List +from typing import Any, Dict, List, Optional @dataclass diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index fb532a61b487..0780c8cc610f 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -8,8 +8,7 @@ from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch -from airbyte_cdk.models import AirbyteMessage -from airbyte_cdk.models import ConfiguredAirbyteCatalog +from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.source import Source from connector_builder import connector_builder_handler diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 955224091392..125e493d13e0 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -1,13 +1,14 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import pytest + +from functools import partial import connector_builder.connector_builder_handler +import pytest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import resolve_manifest, get_connector_builder_request_handler +from connector_builder.connector_builder_handler import get_connector_builder_request_handler, resolve_manifest from unit_tests.connector_builder.utils import create_configured_catalog -from functools import partial _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py index db2cf2dcf04f..b12cd281cf62 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py @@ -1,17 +1,16 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # + import json from typing import Iterator from unittest.mock import MagicMock import pytest - -from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage -from airbyte_cdk.models import Level, AirbyteLogMessage +from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteRecordMessage, Level from airbyte_cdk.models import Type as MessageType from connector_builder.message_grouper import MessageGrouper -from connector_builder.models import StreamReadPages, HttpRequest, HttpResponse, StreamRead +from connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages from unit_tests.connector_builder.utils import create_configured_catalog MAX_PAGES_PER_SLICE = 4 diff --git a/airbyte-cdk/python/unit_tests/connector_builder/utils.py b/airbyte-cdk/python/unit_tests/connector_builder/utils.py index 9566d203901f..15abdd30b9d9 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/utils.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/utils.py @@ -1,4 +1,9 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + from typing import Any, Mapping + from airbyte_cdk.models.airbyte_protocol import ConfiguredAirbyteCatalog diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index 4c52b6ec6da4..6fdedd2786c2 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -4,13 +4,12 @@ import copy import json -from typing import Mapping, Any +from typing import Any, Mapping from unittest import mock +import connector_builder import pytest - import source_declarative_manifest -import connector_builder from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from source_declarative_manifest.main import handle_request from unit_tests.connector_builder.utils import create_configured_catalog_dict From aea625ee459d5e256c121cfbf746faa97ed18d0c Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Mon, 13 Mar 2023 20:40:33 +0000 Subject: [PATCH 44/71] Give connector_builder its own main.py --- airbyte-cdk/python/connector_builder/main.py | 67 +++++++++++ .../source_declarative_manifest/main.py | 54 ++------- .../test_connector_builder_handler.py | 20 ++++ .../test_source_declarative_manifest.py | 104 ++---------------- 4 files changed, 107 insertions(+), 138 deletions(-) create mode 100644 airbyte-cdk/python/connector_builder/main.py diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py new file mode 100644 index 000000000000..0bff4b6781e8 --- /dev/null +++ b/airbyte-cdk/python/connector_builder/main.py @@ -0,0 +1,67 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import argparse +import sys +from typing import Any, List, Mapping, Tuple + +from airbyte_cdk.connector import BaseConnector +from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from connector_builder.connector_builder_handler import resolve_manifest + + +def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: + manifest = config.get("__injected_declarative_manifest") + return ManifestDeclarativeSource(manifest) + + +def get_config_from_args(args: List[str]) -> Mapping[str, Any]: + command, config_filepath = preparse(args) + if command == "spec": + raise ValueError("spec command is not supported for injected declarative manifest") + + config = BaseConnector.read_config(config_filepath) + + if "__injected_declarative_manifest" not in config: + raise ValueError( + f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}" + ) + + return config + + +def preparse(args: List[str]) -> Tuple[str, str]: + parser = argparse.ArgumentParser() + parser.add_argument("command", type=str, help="Airbyte Protocol command") + parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") + parsed, _ = parser.parse_known_args(args) + return parsed.command, parsed.config + + +def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any]): + command = config.get("__command") + if command == "resolve_manifest": + return resolve_manifest(source) + raise ValueError(f"Unrecognized command {command}.") + + +def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]): + # Verify that the correct args are present for the production codepaths. + AirbyteEntrypoint.parse_args(args) + launch(source, sys.argv[1:]) + + +def handle_request(args: List[str]): + config = get_config_from_args(args) + source = create_source(config) + if "__command" in config: + print(handle_connector_builder_request(source, config)) + else: + handle_connector_request(source, args) + + +if __name__ == "__main__": + handle_request(sys.argv[1:]) diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 369743eda454..2c1bdcb2b782 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -3,65 +3,27 @@ # -import argparse import sys -from typing import Any, List, Mapping, Tuple +from typing import List from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder import connector_builder_handler -def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: - manifest = config.get("__injected_declarative_manifest") - return ManifestDeclarativeSource(manifest) - - -def get_config_from_args(args: List[str]) -> Mapping[str, Any]: - command, config_filepath = preparse(args) - if command == "spec": +def create_manifest(args: List[str]): + parsed_args = AirbyteEntrypoint.parse_args(args) + if parsed_args.command == "spec": raise ValueError("spec command is not supported for injected declarative manifest") - config = BaseConnector.read_config(config_filepath) - + config = BaseConnector.read_config(parsed_args.config) if "__injected_declarative_manifest" not in config: raise ValueError( f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}" ) - - return config - - -def preparse(args: List[str]) -> Tuple[str, str]: - parser = argparse.ArgumentParser() - parser.add_argument("command", type=str, help="Airbyte Protocol command") - parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") - parsed, _ = parser.parse_known_args(args) - return parsed.command, parsed.config - - -def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any]): - command = config.get("__command") - if command == "resolve_manifest": - return connector_builder_handler.resolve_manifest(source) - raise ValueError(f"Unrecognized command {command}.") - - -def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]): - # Verify that the correct args are present for the production codepaths. - AirbyteEntrypoint.parse_args(args) - launch(source, sys.argv[1:]) - - -def handle_request(args: List[str]): - config = get_config_from_args(args) - source = create_source(config) - if "__command" in config: - print(handle_connector_builder_request(source, config)) - else: - handle_connector_request(source, args) + return ManifestDeclarativeSource(config.get("__injected_declarative_manifest")) if __name__ == "__main__": - handle_request(sys.argv[1:]) + source = create_manifest(sys.argv[1:]) + launch(source, sys.argv[1:]) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index a0ca267dd45f..551aa7eac51a 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -2,8 +2,12 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import copy + +import pytest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest +from connector_builder.main import handle_connector_builder_request _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -177,3 +181,19 @@ def resolved_manifest(self): source = MockManifestDeclarativeSource() response = resolve_manifest(source) assert "Error resolving manifest" in response.trace.error.message + + +@pytest.mark.parametrize( + "command", + [ + pytest.param("asdf", id="test_arbitrary_command_error"), + pytest.param(None, id="test_command_is_none_error"), + pytest.param("", id="test_command_is_empty_error"), + ], +) +def test_invalid_command(command): + config = copy.deepcopy(CONFIG) + config["__command"] = command + source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) + with pytest.raises(ValueError): + handle_connector_builder_request(source, config) diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index 7b6d75f42f28..aa762395f4dc 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -4,12 +4,10 @@ import copy import json -from unittest import mock import pytest -import source_declarative_manifest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from source_declarative_manifest.main import handle_connector_builder_request, handle_request +from source_declarative_manifest.main import create_manifest CONFIG = { "__injected_declarative_manifest": { @@ -43,112 +41,34 @@ } } -CATALOG = {} - @pytest.fixture def valid_config_file(tmp_path): - return _write_to_tmp_path(tmp_path, CONFIG, "config") - - -@pytest.fixture -def catalog_file(tmp_path): - return _write_to_tmp_path(tmp_path, CATALOG, "catalog") + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps(CONFIG)) + return config_file @pytest.fixture def config_file_without_injection(tmp_path): config = copy.deepcopy(CONFIG) del config["__injected_declarative_manifest"] - return _write_to_tmp_path(tmp_path, config, "config") - - -@pytest.fixture -def config_file_with_command(tmp_path): - config = copy.deepcopy(CONFIG) - config["__command"] = "command" - return _write_to_tmp_path(tmp_path, config, "config") - -def _write_to_tmp_path(tmp_path, config, filename): - config_file = tmp_path / f"{filename}.json" + config_file = tmp_path / "config.json" config_file.write_text(json.dumps(config)) return config_file -def test_on_spec_command_then_raise_value_error(valid_config_file): +def test_on_spec_command_then_raise_value_error(): with pytest.raises(ValueError): - handle_request(["spec", "--config", str(valid_config_file)]) + create_manifest(["spec"]) -@pytest.mark.parametrize( - "command", - [ - pytest.param("check", id="test_check_command_error"), - pytest.param("discover", id="test_discover_command_error"), - pytest.param("read", id="test_read_command_error"), - pytest.param("asdf", id="test_arbitrary_command_error"), - ], -) -def test_given_no_injected_declarative_manifest_then_raise_value_error(command, config_file_without_injection): +def test_given_no_injected_declarative_manifest_then_raise_value_error(config_file_without_injection): with pytest.raises(ValueError): - handle_request([command, "--config", str(config_file_without_injection)]) + create_manifest(["check", "--config", str(config_file_without_injection)]) -@pytest.mark.parametrize( - "command", - [ - pytest.param("check", id="test_check_command_error"), - pytest.param("discover", id="test_discover_command_error"), - pytest.param("read", id="test_read_command_error"), - pytest.param("asdf", id="test_arbitrary_command_error"), - ], -) -def test_missing_config_raises_value_error(command): - with pytest.raises(SystemExit): - handle_request([command]) - - -@pytest.mark.parametrize( - "command", - [ - pytest.param("check", id="test_check_command"), - pytest.param("discover", id="test_discover_command"), - pytest.param("read", id="test_read_command"), - ], -) -def test_given_injected_declarative_manifest_then_launch_with_declarative_manifest(command, valid_config_file, catalog_file): - with mock.patch("source_declarative_manifest.main.launch") as patch: - if command == "read": - handle_request([command, "--config", str(valid_config_file), "--catalog", str(catalog_file)]) - else: - handle_request([command, "--config", str(valid_config_file)]) - source, _ = patch.call_args[0] - assert isinstance(source, ManifestDeclarativeSource) - - -def test_given_injected_declarative_manifest_then_launch_with_declarative_manifest_missing_arg(valid_config_file): - with pytest.raises(SystemExit): - handle_request(["read", "--config", str(valid_config_file)]) - - -def test_given_command_then_use_connector_builder_handler(config_file_with_command): - with mock.patch.object(source_declarative_manifest.main, "handle_connector_builder_request") as patch: - handle_request(["read", "--config", str(config_file_with_command)]) - assert patch.call_count == 1 - - -@pytest.mark.parametrize( - "command", - [ - pytest.param("asdf", id="test_arbitrary_command_error"), - pytest.param(None, id="test_command_is_none_error"), - pytest.param("", id="test_command_is_empty_error"), - ], -) -def test_invalid_command(command): - config = copy.deepcopy(CONFIG) - config["__command"] = command - source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) - with pytest.raises(ValueError): - handle_connector_builder_request(source, config) +def test_given_injected_declarative_manifest_then_return_declarative_manifest(valid_config_file): + source = create_manifest(["check", "--config", str(valid_config_file)]) + assert isinstance(source, ManifestDeclarativeSource) From 0b15013cad32b060e8b5992da20b10a40924a4b7 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 13 Mar 2023 15:09:15 -0700 Subject: [PATCH 45/71] Update --- .../connector_builder_handler.py | 3 +- airbyte-cdk/python/connector_builder/main.py | 29 ++++++++++++------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 4cbe4c321c1a..b9cbd7697f47 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -21,7 +21,8 @@ def list_streams() -> AirbyteMessage: def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: - command_config = config.get("__test_read_config") + print(f"config: {config.keys()}") + command_config = config["__test_read_config"] max_pages_per_slice = command_config["max_pages_per_slice"] max_slices = command_config["max_slices"] max_records = command_config["max_records"] diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 526067b09d44..9096d42f22f4 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -10,12 +10,13 @@ from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import resolve_manifest +from connector_builder.connector_builder_handler import resolve_manifest, read_stream +from airbyte_cdk.models import ConfiguredAirbyteCatalog -def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: +def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarativeSource: manifest = config.get("__injected_declarative_manifest") - return ManifestDeclarativeSource(manifest) + return ManifestDeclarativeSource(manifest, debug) def get_config_from_args(args: List[str]) -> Mapping[str, Any]: @@ -41,13 +42,15 @@ def preparse(args: List[str]) -> Tuple[str, str]: return parsed.command, parsed.config -def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any]): - command = config.get("__command") +def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog): + command = config["__command"] if command == "resolve_manifest": - return resolve_manifest(source) + result = resolve_manifest(source) elif command == "read": - raise NotImplementedError - raise ValueError(f"Unrecognized command {command}.") + result = read_stream(source, config, catalog) + else: + raise ValueError(f"Unrecognized command {command}.") + print(result) def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]): @@ -57,11 +60,15 @@ def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]) def handle_request(args: List[str]): - config = get_config_from_args(args) - source = create_source(config) + parser = AirbyteEntrypoint.parse_args(args) + config_path, catalog_path = parser.config, parser.catalog + config = BaseConnector.read_config(config_path) + catalog = ConfiguredAirbyteCatalog.parse_obj(BaseConnector.read_config(catalog_path)) if "__command" in config: - print(handle_connector_builder_request(source, config)) + source = create_source(config, True) + handle_connector_builder_request(source, config, catalog) else: + source = create_source(config, False) handle_connector_request(source, args) From 5ad0fba31e6df031677d584719eea5c694b31799 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 13 Mar 2023 15:11:01 -0700 Subject: [PATCH 46/71] reset --- airbyte-cdk/python/source_declarative_manifest/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte-cdk/python/source_declarative_manifest/main.py b/airbyte-cdk/python/source_declarative_manifest/main.py index 51c1d2543816..2c1bdcb2b782 100644 --- a/airbyte-cdk/python/source_declarative_manifest/main.py +++ b/airbyte-cdk/python/source_declarative_manifest/main.py @@ -4,12 +4,10 @@ import sys -from typing import Any, List, Mapping from typing import List from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch -from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource From f78232822f242308a2328692a6845bef87ef7225 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 13 Mar 2023 15:13:05 -0700 Subject: [PATCH 47/71] delete dead code --- airbyte-cdk/python/connector_builder/main.py | 23 -------------------- 1 file changed, 23 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 9096d42f22f4..fe82af25f8c3 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -19,29 +19,6 @@ def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarative return ManifestDeclarativeSource(manifest, debug) -def get_config_from_args(args: List[str]) -> Mapping[str, Any]: - command, config_filepath = preparse(args) - if command == "spec": - raise ValueError("spec command is not supported for injected declarative manifest") - - config = BaseConnector.read_config(config_filepath) - - if "__injected_declarative_manifest" not in config: - raise ValueError( - f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}" - ) - - return config - - -def preparse(args: List[str]) -> Tuple[str, str]: - parser = argparse.ArgumentParser() - parser.add_argument("command", type=str, help="Airbyte Protocol command") - parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") - parsed, _ = parser.parse_known_args(args) - return parsed.command, parsed.config - - def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog): command = config["__command"] if command == "resolve_manifest": From 1e9c159776e86fdc832b343cb16837534be1763d Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 13 Mar 2023 15:13:40 -0700 Subject: [PATCH 48/71] remove debug print --- .../python/connector_builder/connector_builder_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index b9cbd7697f47..baf40c8f9401 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -21,7 +21,6 @@ def list_streams() -> AirbyteMessage: def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: - print(f"config: {config.keys()}") command_config = config["__test_read_config"] max_pages_per_slice = command_config["max_pages_per_slice"] max_slices = command_config["max_slices"] From 691e957cd7e67bff00ef252899910b71610e27e6 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 13 Mar 2023 15:16:34 -0700 Subject: [PATCH 49/71] update test --- .../connector_builder_handler.py | 21 ----------- .../test_connector_builder_handler.py | 35 +++---------------- 2 files changed, 4 insertions(+), 52 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index baf40c8f9401..472eda68b0fb 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -49,26 +49,5 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage: return error.as_airbyte_message() -CONNECTOR_BUILDER_STREAMS = { - "resolve_manifest", - "list_streams" -} - - -def get_connector_builder_request_handler(config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog): - stream_names = set([s.stream.name for s in configured_catalog.streams]) - if any([s in CONNECTOR_BUILDER_STREAMS for s in stream_names]) or "__test_read_config" in config: - if len(stream_names) != 1: - raise ValueError(f"Only reading from a single stream is supported. Got: {stream_names}") - command = next(iter(stream_names)) # Connector builder only supports reading from a single stream - if command == "resolve_manifest": - return resolve_manifest - elif command == "list_streams": - return list_streams - else: - return partial(read_stream, config=config, configured_catalog=configured_catalog) - return None - - def _emitted_at(): return int(datetime.now().timestamp()) * 1000 diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index b1556729ac71..b245240c599f 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -2,19 +2,14 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from functools import partial - -import connector_builder.connector_builder_handler -import pytest -from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import get_connector_builder_request_handler, resolve_manifest -from unit_tests.connector_builder.utils import create_configured_catalog import copy import pytest + from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest from connector_builder.main import handle_connector_builder_request +from unit_tests.connector_builder.utils import create_configured_catalog _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -195,29 +190,6 @@ def resolved_manifest(self): assert "Error resolving manifest" in response.trace.error.message -@pytest.mark.parametrize("test_name, config, configured_catalog, expected_result", - [ - ("test_resolve_manifest_is_connector_builder_request", CONFIG, create_configured_catalog("resolve_manifest"), connector_builder.connector_builder_handler.resolve_manifest), - ("test_list_streams_is_connector_builder_request", CONFIG, create_configured_catalog("list_streams"), connector_builder.connector_builder_handler.list_streams), - ("test_regular_stream_is_not_connector_builder_request", CONFIG, create_configured_catalog("my_stream"), None), - ("test_regular_stream_with_test_read_config_is_connector_builder_request", TEST_READ_CONFIG, - create_configured_catalog("my_stream"), - partial(connector_builder.connector_builder_handler.read_stream, config=TEST_READ_CONFIG, - configured_catalog=create_configured_catalog("my_stream"))), - ]) -def test_get_connector_builder_request(test_name, config, configured_catalog, expected_result): - result = get_connector_builder_request_handler(config, configured_catalog) - if isinstance(expected_result, partial): - assert partial_functions_equal(expected_result, result) - else: - assert result == expected_result - - -def partial_functions_equal(func1, func2): - if not (isinstance(func1, partial) and isinstance(func2, partial)): - return False - are_equal = all([getattr(func1, attr) == getattr(func2, attr) for attr in ['func', 'args', 'keywords']]) - return are_equal @pytest.mark.parametrize( "command", [ @@ -231,4 +203,5 @@ def test_invalid_command(command): config["__command"] = command source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) with pytest.raises(ValueError): - handle_connector_builder_request(source, config) + handle_connector_builder_request(source, config, create_configured_catalog("my_stream")) + From 2280924c0a40de4c1b0805a56eabe1532071b39a Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 13 Mar 2023 15:37:00 -0700 Subject: [PATCH 50/71] Update --- .../connector_builder/connector_builder_handler.py | 3 ++- airbyte-cdk/python/connector_builder/main.py | 2 +- .../test_connector_builder_handler.py | 14 ++++---------- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 472eda68b0fb..0cadad208a60 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -26,10 +26,11 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured max_slices = command_config["max_slices"] max_records = command_config["max_records"] handler = MessageGrouper(max_pages_per_slice, max_slices) + stream_name = configured_catalog.streams[0].stream # The connector builder only supports a single stream stream_read = handler.get_message_groups(source, config, configured_catalog, max_records) return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( data=dataclasses.asdict(stream_read), - stream="_test_read", + stream="_test_read", #FIXME emitted_at=_emitted_at() )) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index fe82af25f8c3..e7fbc64aa5f4 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -22,7 +22,7 @@ def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarative def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog): command = config["__command"] if command == "resolve_manifest": - result = resolve_manifest(source) + result = resolve_manifest(source) elif command == "read": result = read_stream(source, config, catalog) else: diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index b245240c599f..734b9fb5ea95 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -3,13 +3,16 @@ # import copy +from unittest.mock import patch import pytest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest -from connector_builder.main import handle_connector_builder_request +from connector_builder.main import handle_connector_builder_request, read_stream +from connector_builder.models import StreamRead from unit_tests.connector_builder.utils import create_configured_catalog +from airbyte_cdk.models import ConfiguredAirbyteCatalog, AirbyteMessage _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -51,14 +54,6 @@ "__injected_declarative_manifest": MANIFEST, } -TEST_READ_CONFIG = { - "__injected_declarative_manifest": MANIFEST, - "__test_read_config": { - "max_records": 10 - } -} - - def test_resolve_manifest(): source = ManifestDeclarativeSource(MANIFEST) resolved_manifest = resolve_manifest(source) @@ -204,4 +199,3 @@ def test_invalid_command(command): source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) with pytest.raises(ValueError): handle_connector_builder_request(source, config, create_configured_catalog("my_stream")) - From 754c61cfaf5636056b1451899182e5a0f3ac15ca Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 13 Mar 2023 15:39:39 -0700 Subject: [PATCH 51/71] set right stream --- .../python/connector_builder/connector_builder_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 0cadad208a60..e20888ae64c9 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -26,11 +26,11 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured max_slices = command_config["max_slices"] max_records = command_config["max_records"] handler = MessageGrouper(max_pages_per_slice, max_slices) - stream_name = configured_catalog.streams[0].stream # The connector builder only supports a single stream + stream_name = configured_catalog.streams[0].stream # The connector builder only supports a single stream stream_read = handler.get_message_groups(source, config, configured_catalog, max_records) return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( data=dataclasses.asdict(stream_read), - stream="_test_read", #FIXME + stream=stream_name, emitted_at=_emitted_at() )) From d38a76089c41b2397ff426134f74e99b2b06e527 Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Mon, 13 Mar 2023 23:35:13 +0000 Subject: [PATCH 52/71] Add --catalog argument --- airbyte-cdk/python/connector_builder/main.py | 14 ++-- .../test_connector_builder_handler.py | 64 ++++++++++++++++--- 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 0bff4b6781e8..aeeaf759df77 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -8,7 +8,6 @@ from typing import Any, List, Mapping, Tuple from airbyte_cdk.connector import BaseConnector -from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest @@ -20,8 +19,8 @@ def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: def get_config_from_args(args: List[str]) -> Mapping[str, Any]: command, config_filepath = preparse(args) - if command == "spec": - raise ValueError("spec command is not supported for injected declarative manifest") + if command != "read": + raise ValueError("Only read commands are allowed for Connector Builder requests.") config = BaseConnector.read_config(config_filepath) @@ -37,6 +36,7 @@ def preparse(args: List[str]) -> Tuple[str, str]: parser = argparse.ArgumentParser() parser.add_argument("command", type=str, help="Airbyte Protocol command") parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") + parser.add_argument("--catalog", type=str, required=True, help="path to the catalog file, if it exists (otherwise empty string)") parsed, _ = parser.parse_known_args(args) return parsed.command, parsed.config @@ -48,19 +48,13 @@ def handle_connector_builder_request(source: ManifestDeclarativeSource, config: raise ValueError(f"Unrecognized command {command}.") -def handle_connector_request(source: ManifestDeclarativeSource, args: List[str]): - # Verify that the correct args are present for the production codepaths. - AirbyteEntrypoint.parse_args(args) - launch(source, sys.argv[1:]) - - def handle_request(args: List[str]): config = get_config_from_args(args) source = create_source(config) if "__command" in config: print(handle_connector_builder_request(source, config)) else: - handle_connector_request(source, args) + raise ValueError("Missing __command argument in config file.") if __name__ == "__main__": diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 551aa7eac51a..dfb2b4ec7c3c 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -3,11 +3,14 @@ # import copy +import json +from unittest import mock +import connector_builder import pytest from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest -from connector_builder.main import handle_connector_builder_request +from connector_builder.main import handle_connector_builder_request, handle_request _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -52,9 +55,33 @@ } -def test_resolve_manifest(): +@pytest.fixture +def valid_config_file(tmp_path): + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps(CONFIG)) + return config_file + + +@pytest.fixture +def invalid_config_file(tmp_path): + invalid_config = copy.deepcopy(CONFIG) + invalid_config["__command"] = "bad_command" + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps(invalid_config)) + return config_file + + +def test_handle_resolve_manifest(valid_config_file): + with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patch: + handle_request(["read", "--config", str(valid_config_file), "--catalog", ""]) + assert patch.call_count == 1 + + +def test_resolve_manifest(valid_config_file): + config = copy.deepcopy(CONFIG) + config["__command"] = "resolve_manifest" source = ManifestDeclarativeSource(MANIFEST) - resolved_manifest = resolve_manifest(source) + resolved_manifest = handle_connector_builder_request(source, config) expected_resolved_manifest = { "type": "DeclarativeSource", @@ -186,14 +213,35 @@ def resolved_manifest(self): @pytest.mark.parametrize( "command", [ - pytest.param("asdf", id="test_arbitrary_command_error"), + pytest.param("check", id="test_check_command_error"), + pytest.param("spec", id="test_spec_command_error"), + pytest.param("discover", id="test_discover_command_error"), pytest.param(None, id="test_command_is_none_error"), pytest.param("", id="test_command_is_empty_error"), ], ) -def test_invalid_command(command): +def test_invalid_protocol_command(command, valid_config_file): config = copy.deepcopy(CONFIG) - config["__command"] = command - source = ManifestDeclarativeSource(CONFIG["__injected_declarative_manifest"]) + config["__command"] = "list_streams" + with pytest.raises(ValueError): + handle_request([command, "--config", str(valid_config_file), "--catalog", ""]) + + +def test_missing_command(valid_config_file): + with pytest.raises(SystemExit): + handle_request(["--config", str(valid_config_file), "--catalog", ""]) + + +def test_missing_catalog(valid_config_file): + with pytest.raises(SystemExit): + handle_request(["read", "--config", str(valid_config_file)]) + + +def test_missing_config(valid_config_file): + with pytest.raises(SystemExit): + handle_request(["read", "--catalog", str(valid_config_file)]) + + +def test_invalid_config_command(invalid_config_file): with pytest.raises(ValueError): - handle_connector_builder_request(source, config) + handle_request(["read", "--config", str(invalid_config_file), "--catalog", ""]) From d64cf5dba8ef6736c8cc684f38efdb571ae1823e Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Tue, 14 Mar 2023 11:14:29 +0000 Subject: [PATCH 53/71] Remove unneeded preparse --- airbyte-cdk/python/connector_builder/main.py | 19 +++++-------------- .../test_connector_builder_handler.py | 2 +- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index aeeaf759df77..15ac5dcb8aa9 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -3,11 +3,11 @@ # -import argparse import sys -from typing import Any, List, Mapping, Tuple +from typing import Any, List, Mapping from airbyte_cdk.connector import BaseConnector +from airbyte_cdk.entrypoint import AirbyteEntrypoint from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest @@ -18,11 +18,11 @@ def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: def get_config_from_args(args: List[str]) -> Mapping[str, Any]: - command, config_filepath = preparse(args) - if command != "read": + parsed_args = AirbyteEntrypoint.parse_args(args) + if parsed_args.command != "read": raise ValueError("Only read commands are allowed for Connector Builder requests.") - config = BaseConnector.read_config(config_filepath) + config = BaseConnector.read_config(parsed_args.config) if "__injected_declarative_manifest" not in config: raise ValueError( @@ -32,15 +32,6 @@ def get_config_from_args(args: List[str]) -> Mapping[str, Any]: return config -def preparse(args: List[str]) -> Tuple[str, str]: - parser = argparse.ArgumentParser() - parser.add_argument("command", type=str, help="Airbyte Protocol command") - parser.add_argument("--config", type=str, required=True, help="path to the json configuration file") - parser.add_argument("--catalog", type=str, required=True, help="path to the catalog file, if it exists (otherwise empty string)") - parsed, _ = parser.parse_known_args(args) - return parsed.command, parsed.config - - def handle_connector_builder_request(source: ManifestDeclarativeSource, config: Mapping[str, Any]): command = config.get("__command") if command == "resolve_manifest": diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index dfb2b4ec7c3c..a2f2c81d89fa 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -223,7 +223,7 @@ def resolved_manifest(self): def test_invalid_protocol_command(command, valid_config_file): config = copy.deepcopy(CONFIG) config["__command"] = "list_streams" - with pytest.raises(ValueError): + with pytest.raises(SystemExit): handle_request([command, "--config", str(valid_config_file), "--catalog", ""]) From 873263980d4cb8abe2cbc6345eda9fe226ab45c7 Mon Sep 17 00:00:00 2001 From: Catherine Noll Date: Tue, 14 Mar 2023 11:30:30 +0000 Subject: [PATCH 54/71] Update README --- airbyte-cdk/python/connector_builder/README.md | 15 +++++++++++++-- .../python/source_declarative_manifest/README.md | 2 -- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/README.md b/airbyte-cdk/python/connector_builder/README.md index ac2db315bc3e..f56a422b0e8a 100644 --- a/airbyte-cdk/python/connector_builder/README.md +++ b/airbyte-cdk/python/connector_builder/README.md @@ -7,10 +7,21 @@ This is the backend for requests from the [Connector Builder](https://docs.airby ### Locally running the Connector Builder backend ``` -python main.py read --config secrets/config.json +python main.py read --config path/to/config --catalog path/to/catalog ``` -Note: Requires the keys `__injected_declarative_manifest` and `__command` in its config, where `__injected_declarative_manifest` is a JSON manifest and `__command` is one of the commands handled by the ConnectorBuilderHandler (`stream_read`, `list_streams`, or `resolve_manifest`). +Note: +- Requires the keys `__injected_declarative_manifest` and `__command` in its config, where `__injected_declarative_manifest` is a JSON manifest and `__command` is one of the commands handled by the ConnectorBuilderHandler (`stream_read`, `list_streams`, or `resolve_manifest`), i.e. +``` +{ + "config": , + "__injected_declarative_manifest": {...}, + "__command": <"resolve_manifest" | "list_streams" | "stream_read"> +} +``` +*See [ConnectionSpecification](https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#actor-specification) for details on the `"config"` key if needed. + +- When the `__command` is `list_streams` or `resolve_manifest`, the argument to `catalog` should be an empty string. ### Locally running the docker image diff --git a/airbyte-cdk/python/source_declarative_manifest/README.md b/airbyte-cdk/python/source_declarative_manifest/README.md index 0ed9a0443476..7a723a4b6d33 100644 --- a/airbyte-cdk/python/source_declarative_manifest/README.md +++ b/airbyte-cdk/python/source_declarative_manifest/README.md @@ -7,8 +7,6 @@ This entrypoint is used for connectors created by the connector builder. These c The spec operation is not supported because the config is not known when running a spec. -This entrypoint is also the entrypoint for requests from the [Connector Builder](https://docs.airbyte.com/connector-development/config-based/connector-builder-ui/) Server. In addition to the `__injected_declarative_manifest`, the [Connector Builder backend](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/connector_builder/README.md) config requires the `__command` key, whose value is one of the commands handled by the ConnectorBuilderHandler (`stream_read`, `list_streams`, or `resolve_manifest`). - ## Local development #### Building From 0616250a44fb6350321cc0c25fe6ddab13ebb5a0 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 10:48:33 -0700 Subject: [PATCH 55/71] handle error --- .../connector_builder_handler.py | 28 +++--- .../test_connector_builder_handler.py | 91 +++++++++++++++---- 2 files changed, 90 insertions(+), 29 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index e20888ae64c9..2bbbbb3c74a2 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -21,18 +21,22 @@ def list_streams() -> AirbyteMessage: def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: - command_config = config["__test_read_config"] - max_pages_per_slice = command_config["max_pages_per_slice"] - max_slices = command_config["max_slices"] - max_records = command_config["max_records"] - handler = MessageGrouper(max_pages_per_slice, max_slices) - stream_name = configured_catalog.streams[0].stream # The connector builder only supports a single stream - stream_read = handler.get_message_groups(source, config, configured_catalog, max_records) - return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( - data=dataclasses.asdict(stream_read), - stream=stream_name, - emitted_at=_emitted_at() - )) + try: + command_config = config["__test_read_config"] + max_pages_per_slice = command_config["max_pages_per_slice"] + max_slices = command_config["max_slices"] + max_records = command_config["max_records"] + handler = MessageGrouper(max_pages_per_slice, max_slices) + stream_name = configured_catalog.streams[0].stream # The connector builder only supports a single stream + stream_read = handler.get_message_groups(source, config, configured_catalog, max_records) + return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( + data=dataclasses.asdict(stream_read), + stream=stream_name, + emitted_at=_emitted_at() + )) + except Exception as exc: + error = AirbyteTracedException.from_exception(exc, message=f"Error reading stream with catalog={configured_catalog}") + return error.as_airbyte_message() def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage: diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index c2b36d41d1b8..6d59dcc2cf1c 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -54,11 +54,21 @@ "check": {"type": "CheckStream", "stream_names": ["lists"]}, } -CONFIG = { +RESOLVE_MANIFEST_CONFIG = { "__injected_declarative_manifest": MANIFEST, "__command": "resolve_manifest", } +TEST_READ_CONFIG = { + "__injected_declarative_manifest": MANIFEST, + "__command": "read", + "__test_read_config": { + "max_pages_per_slice": 2, + "max_slices": 5, + "max_records": 10 + } +} + DUMMY_CATALOG = { "streams": [ { @@ -78,11 +88,36 @@ ] } +CONFIGURED_CATALOG = { + "streams": [ + { + "stream": { + "name": _stream_name, + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": {} + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": False + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} + + +@pytest.fixture +def valid_resolve_manifest_config_file(tmp_path): + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps(RESOLVE_MANIFEST_CONFIG)) + return config_file @pytest.fixture -def valid_config_file(tmp_path): +def valid_read_config_file(tmp_path): config_file = tmp_path / "config.json" - config_file.write_text(json.dumps(CONFIG)) + config_file.write_text(json.dumps(TEST_READ_CONFIG)) return config_file @pytest.fixture @@ -91,24 +126,35 @@ def dummy_catalog(tmp_path): config_file.write_text(json.dumps(DUMMY_CATALOG)) return config_file +@pytest.fixture +def configured_catalog(tmp_path): + config_file = tmp_path / "catalog.json" + config_file.write_text(json.dumps(CONFIGURED_CATALOG)) + return config_file + @pytest.fixture def invalid_config_file(tmp_path): - invalid_config = copy.deepcopy(CONFIG) + invalid_config = copy.deepcopy(RESOLVE_MANIFEST_CONFIG) invalid_config["__command"] = "bad_command" config_file = tmp_path / "config.json" config_file.write_text(json.dumps(invalid_config)) return config_file -def test_handle_resolve_manifest(valid_config_file, dummy_catalog): +def test_handle_resolve_manifest(valid_resolve_manifest_config_file, dummy_catalog): with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patch: - handle_request(["read", "--config", str(valid_config_file), "--catalog", str(dummy_catalog)]) + handle_request(["read", "--config", str(valid_resolve_manifest_config_file), "--catalog", str(dummy_catalog)]) assert patch.call_count == 1 +def test_handle_test_read(valid_read_config_file, configured_catalog): + with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patch: + handle_request(["read", "--config", str(valid_read_config_file), "--catalog", str(configured_catalog)]) + assert patch.call_count == 1 -def test_resolve_manifest(valid_config_file): - config = copy.deepcopy(CONFIG) + +def test_resolve_manifest(valid_resolve_manifest_config_file): + config = copy.deepcopy(RESOLVE_MANIFEST_CONFIG) config["__command"] = "resolve_manifest" source = ManifestDeclarativeSource(MANIFEST) resolved_manifest = handle_connector_builder_request(source, config, create_configured_catalog("dummy_stream")) @@ -239,6 +285,17 @@ def resolved_manifest(self): response = resolve_manifest(source) assert "Error resolving manifest" in response.trace.error.message +def test_read(): + assert False + +def test_read_returns_error_response(): + class MockManifestDeclarativeSource: + def read(self, logger, config, catalog, state): + raise ValueError + + source = MockManifestDeclarativeSource() + response = read_stream(source, TEST_READ_CONFIG, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG)) + assert "Error reading" in response.trace.error.message @pytest.mark.parametrize( "command", @@ -250,26 +307,26 @@ def resolved_manifest(self): pytest.param("", id="test_command_is_empty_error"), ], ) -def test_invalid_protocol_command(command, valid_config_file): - config = copy.deepcopy(CONFIG) +def test_invalid_protocol_command(command, valid_resolve_manifest_config_file): + config = copy.deepcopy(RESOLVE_MANIFEST_CONFIG) config["__command"] = "list_streams" with pytest.raises(SystemExit): - handle_request([command, "--config", str(valid_config_file), "--catalog", ""]) + handle_request([command, "--config", str(valid_resolve_manifest_config_file), "--catalog", ""]) -def test_missing_command(valid_config_file): +def test_missing_command(valid_resolve_manifest_config_file): with pytest.raises(SystemExit): - handle_request(["--config", str(valid_config_file), "--catalog", ""]) + handle_request(["--config", str(valid_resolve_manifest_config_file), "--catalog", ""]) -def test_missing_catalog(valid_config_file): +def test_missing_catalog(valid_resolve_manifest_config_file): with pytest.raises(SystemExit): - handle_request(["read", "--config", str(valid_config_file)]) + handle_request(["read", "--config", str(valid_resolve_manifest_config_file)]) -def test_missing_config(valid_config_file): +def test_missing_config(valid_resolve_manifest_config_file): with pytest.raises(SystemExit): - handle_request(["read", "--catalog", str(valid_config_file)]) + handle_request(["read", "--catalog", str(valid_resolve_manifest_config_file)]) def test_invalid_config_command(invalid_config_file, dummy_catalog): From fa491f7fd1bf4522256030acf0ec8f604478bc48 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 11:29:15 -0700 Subject: [PATCH 56/71] tests pass --- .../connector_builder_handler.py | 2 +- .../test_connector_builder_handler.py | 34 ++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 2bbbbb3c74a2..94e6d0ddb8f7 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -27,7 +27,7 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured max_slices = command_config["max_slices"] max_records = command_config["max_records"] handler = MessageGrouper(max_pages_per_slice, max_slices) - stream_name = configured_catalog.streams[0].stream # The connector builder only supports a single stream + stream_name = configured_catalog.streams[0].stream.name # The connector builder only supports a single stream stream_read = handler.get_message_groups(source, config, configured_catalog, max_records) return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage( data=dataclasses.asdict(stream_read), diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 6d59dcc2cf1c..619ee8268e67 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -3,7 +3,8 @@ # import copy -from unittest.mock import patch +import dataclasses +from unittest.mock import patch, MagicMock import json from unittest import mock @@ -13,9 +14,10 @@ from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest from connector_builder.main import handle_connector_builder_request, read_stream -from connector_builder.models import StreamRead +from connector_builder.models import StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner from unit_tests.connector_builder.utils import create_configured_catalog -from airbyte_cdk.models import ConfiguredAirbyteCatalog, AirbyteMessage +from airbyte_cdk.models import ConfiguredAirbyteCatalog, AirbyteMessage, AirbyteRecordMessage +from airbyte_cdk.models import Type as MessageType from connector_builder.main import handle_connector_builder_request, handle_request _stream_name = "stream_with_custom_requester" @@ -114,18 +116,21 @@ def valid_resolve_manifest_config_file(tmp_path): config_file.write_text(json.dumps(RESOLVE_MANIFEST_CONFIG)) return config_file + @pytest.fixture def valid_read_config_file(tmp_path): config_file = tmp_path / "config.json" config_file.write_text(json.dumps(TEST_READ_CONFIG)) return config_file + @pytest.fixture def dummy_catalog(tmp_path): config_file = tmp_path / "catalog.json" config_file.write_text(json.dumps(DUMMY_CATALOG)) return config_file + @pytest.fixture def configured_catalog(tmp_path): config_file = tmp_path / "catalog.json" @@ -147,6 +152,7 @@ def test_handle_resolve_manifest(valid_resolve_manifest_config_file, dummy_catal handle_request(["read", "--config", str(valid_resolve_manifest_config_file), "--catalog", str(dummy_catalog)]) assert patch.call_count == 1 + def test_handle_test_read(valid_read_config_file, configured_catalog): with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patch: handle_request(["read", "--config", str(valid_read_config_file), "--catalog", str(configured_catalog)]) @@ -285,8 +291,27 @@ def resolved_manifest(self): response = resolve_manifest(source) assert "Error resolving manifest" in response.trace.error.message + def test_read(): - assert False + config = TEST_READ_CONFIG + source = ManifestDeclarativeSource(MANIFEST) + + real_record = AirbyteRecordMessage(data={"id": "1234", "key": "value"}, emitted_at=1, stream=_stream_name) + stream_read = StreamRead(logs = [{"message": "here be a log message"}], + slices=[StreamReadSlicesInner(pages=[ + StreamReadSlicesInnerPagesInner(records=[real_record], request=None, response=None)], + slice_descriptor=None, state=None) + ], + test_read_limit_reached=False, inferred_schema=None) + + expected_airbyte_message = AirbyteMessage(type=MessageType.RECORD, + record=AirbyteRecordMessage(stream=_stream_name, data=dataclasses.asdict(stream_read), emitted_at=1)) + with patch("connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read) as mock_message_grouper: + output_record = handle_connector_builder_request(source, config, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG)) + output_record.record.emitted_at = 1 + assert output_record == expected_airbyte_message + + def test_read_returns_error_response(): class MockManifestDeclarativeSource: @@ -297,6 +322,7 @@ def read(self, logger, config, catalog, state): response = read_stream(source, TEST_READ_CONFIG, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG)) assert "Error reading" in response.trace.error.message + @pytest.mark.parametrize( "command", [ From 1e049047b1d31125d9b11d991e6884fc4eb8aded Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 11:31:41 -0700 Subject: [PATCH 57/71] more explicit test --- .../test_connector_builder_handler.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 619ee8268e67..e2c612126cb4 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -305,8 +305,17 @@ def test_read(): test_read_limit_reached=False, inferred_schema=None) expected_airbyte_message = AirbyteMessage(type=MessageType.RECORD, - record=AirbyteRecordMessage(stream=_stream_name, data=dataclasses.asdict(stream_read), emitted_at=1)) - with patch("connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read) as mock_message_grouper: + record=AirbyteRecordMessage(stream=_stream_name, data={ + "logs": [{"message": "here be a log message"}], + "slices": [{ + "pages": [{"records": [real_record], "request": None, "response": None}], + "slice_descriptor": None, + "state": None + }], + "test_read_limit_reached": False, + "inferred_schema": None + }, emitted_at=1)) + with patch("connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read): output_record = handle_connector_builder_request(source, config, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG)) output_record.record.emitted_at = 1 assert output_record == expected_airbyte_message From aa2839ccc68c1d1194045b362bf152bd69e148cf Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 11:32:57 -0700 Subject: [PATCH 58/71] reset --- .../test_source_declarative_manifest.py | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py index aa762395f4dc..63757dda03f5 100644 --- a/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py +++ b/airbyte-cdk/python/unit_tests/test_source_declarative_manifest.py @@ -13,11 +13,35 @@ "__injected_declarative_manifest": { "version": "0.1.0", "definitions": { - "selector": {"extractor": {"field_path": []}}, - "requester": {"url_base": "https://test.com/api", "http_method": "GET"}, - "retriever": {"record_selector": {"$ref": "#/definitions/selector"}, "requester": {"$ref": "#/definitions/requester"}}, - "base_stream": {"retriever": {"$ref": "#/definitions/retriever"}}, - "data_stream": {"$ref": "#/definitions/base_stream", "$parameters": {"name": "data", "path": "/data"}}, + "selector": { + "extractor": { + "field_path": [] + } + }, + "requester": { + "url_base": "https://test.com/api", + "http_method": "GET" + }, + "retriever": { + "record_selector": { + "$ref": "#/definitions/selector" + }, + "requester": { + "$ref": "#/definitions/requester" + } + }, + "base_stream": { + "retriever": { + "$ref": "#/definitions/retriever" + } + }, + "data_stream": { + "$ref": "#/definitions/base_stream", + "$parameters": { + "name": "data", + "path": "/data" + } + }, }, "streams": [ "#/definitions/data_stream", @@ -35,9 +59,9 @@ "title": "Test Spec", "type": "object", "additionalProperties": True, - "properties": {}, - }, - }, + "properties": {} + } + } } } From 5d3163f16cabcf16987ea40b6353b515503dff6b Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 11:34:03 -0700 Subject: [PATCH 59/71] format --- airbyte-cdk/python/connector_builder/main.py | 4 +-- .../test_connector_builder_handler.py | 33 +++++++++---------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index b88c0f36071d..2ecfb8e16b30 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -8,9 +8,9 @@ from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint -from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import resolve_manifest, read_stream from airbyte_cdk.models import ConfiguredAirbyteCatalog +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from connector_builder.connector_builder_handler import read_stream, resolve_manifest def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarativeSource: diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index e2c612126cb4..82d38e9ff3c0 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -4,21 +4,19 @@ import copy import dataclasses -from unittest.mock import patch, MagicMock import json from unittest import mock +from unittest.mock import MagicMock, patch import connector_builder import pytest - +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog +from airbyte_cdk.models import Type as MessageType from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from connector_builder.connector_builder_handler import resolve_manifest -from connector_builder.main import handle_connector_builder_request, read_stream +from connector_builder.main import handle_connector_builder_request, handle_request, read_stream from connector_builder.models import StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner from unit_tests.connector_builder.utils import create_configured_catalog -from airbyte_cdk.models import ConfiguredAirbyteCatalog, AirbyteMessage, AirbyteRecordMessage -from airbyte_cdk.models import Type as MessageType -from connector_builder.main import handle_connector_builder_request, handle_request _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" @@ -297,7 +295,7 @@ def test_read(): source = ManifestDeclarativeSource(MANIFEST) real_record = AirbyteRecordMessage(data={"id": "1234", "key": "value"}, emitted_at=1, stream=_stream_name) - stream_read = StreamRead(logs = [{"message": "here be a log message"}], + stream_read = StreamRead(logs=[{"message": "here be a log message"}], slices=[StreamReadSlicesInner(pages=[ StreamReadSlicesInnerPagesInner(records=[real_record], request=None, response=None)], slice_descriptor=None, state=None) @@ -305,23 +303,22 @@ def test_read(): test_read_limit_reached=False, inferred_schema=None) expected_airbyte_message = AirbyteMessage(type=MessageType.RECORD, - record=AirbyteRecordMessage(stream=_stream_name, data={ - "logs": [{"message": "here be a log message"}], - "slices": [{ - "pages": [{"records": [real_record], "request": None, "response": None}], - "slice_descriptor": None, - "state": None - }], - "test_read_limit_reached": False, - "inferred_schema": None - }, emitted_at=1)) + record=AirbyteRecordMessage(stream=_stream_name, data={ + "logs": [{"message": "here be a log message"}], + "slices": [{ + "pages": [{"records": [real_record], "request": None, "response": None}], + "slice_descriptor": None, + "state": None + }], + "test_read_limit_reached": False, + "inferred_schema": None + }, emitted_at=1)) with patch("connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read): output_record = handle_connector_builder_request(source, config, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG)) output_record.record.emitted_at = 1 assert output_record == expected_airbyte_message - def test_read_returns_error_response(): class MockManifestDeclarativeSource: def read(self, logger, config, catalog, state): From 9cd9105a60bf965ae9226e79e81d3179b31b171a Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 11:48:07 -0700 Subject: [PATCH 60/71] fix merge --- .../test_connector_builder_handler.py | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 56acda014319..216d2afab9f8 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -62,17 +62,11 @@ "check": {"type": "CheckStream", "stream_names": ["lists"]}, } -<<<<<<< HEAD RESOLVE_MANIFEST_CONFIG = { -======= - -CONFIG = { ->>>>>>> master "__injected_declarative_manifest": MANIFEST, "__command": "resolve_manifest", } -<<<<<<< HEAD TEST_READ_CONFIG = { "__injected_declarative_manifest": MANIFEST, "__command": "read", @@ -147,30 +141,18 @@ def dummy_catalog(tmp_path): def configured_catalog(tmp_path): config_file = tmp_path / "catalog.json" config_file.write_text(json.dumps(CONFIGURED_CATALOG)) -======= - -@pytest.fixture -def valid_config_file(tmp_path): - config_file = tmp_path / "config.json" - config_file.write_text(json.dumps(CONFIG)) ->>>>>>> master return config_file @pytest.fixture def invalid_config_file(tmp_path): -<<<<<<< HEAD invalid_config = copy.deepcopy(RESOLVE_MANIFEST_CONFIG) -======= - invalid_config = copy.deepcopy(CONFIG) ->>>>>>> master invalid_config["__command"] = "bad_command" config_file = tmp_path / "config.json" config_file.write_text(json.dumps(invalid_config)) return config_file -<<<<<<< HEAD def test_handle_resolve_manifest(valid_resolve_manifest_config_file, dummy_catalog): with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patch: handle_request(["read", "--config", str(valid_resolve_manifest_config_file), "--catalog", str(dummy_catalog)]) @@ -188,19 +170,6 @@ def test_resolve_manifest(valid_resolve_manifest_config_file): config["__command"] = "resolve_manifest" source = ManifestDeclarativeSource(MANIFEST) resolved_manifest = handle_connector_builder_request(source, config, create_configured_catalog("dummy_stream")) -======= -def test_handle_resolve_manifest(valid_config_file): - with mock.patch.object(connector_builder.main, "handle_connector_builder_request") as patch: - handle_request(["read", "--config", str(valid_config_file), "--catalog", ""]) - assert patch.call_count == 1 - - -def test_resolve_manifest(valid_config_file): - config = copy.deepcopy(CONFIG) - config["__command"] = "resolve_manifest" - source = ManifestDeclarativeSource(MANIFEST) - resolved_manifest = handle_connector_builder_request(source, config) ->>>>>>> master expected_resolved_manifest = { "type": "DeclarativeSource", @@ -329,7 +298,6 @@ def resolved_manifest(self): assert "Error resolving manifest" in response.trace.error.message -<<<<<<< HEAD def test_read(): config = TEST_READ_CONFIG source = ManifestDeclarativeSource(MANIFEST) @@ -369,8 +337,6 @@ def read(self, logger, config, catalog, state): assert "Error reading" in response.trace.error.message -======= ->>>>>>> master @pytest.mark.parametrize( "command", [ @@ -381,7 +347,6 @@ def read(self, logger, config, catalog, state): pytest.param("", id="test_command_is_empty_error"), ], ) -<<<<<<< HEAD def test_invalid_protocol_command(command, valid_resolve_manifest_config_file): config = copy.deepcopy(RESOLVE_MANIFEST_CONFIG) config["__command"] = "list_streams" From e37851e0c5b054e608b61c04e7e7cba7a6e096c0 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 11:49:53 -0700 Subject: [PATCH 61/71] raise exception --- airbyte-cdk/python/connector_builder/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 36a325e452e1..00471e191fda 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -54,7 +54,8 @@ def handle_request(args: List[str]): if "__command" in config: source = create_source(config, True) print(handle_connector_builder_request(source, config, catalog)) - return config + else: + raise ValueError("Missing __command argument in config file.") if __name__ == "__main__": From 0b809f98917e90b1643f39e2b157d405e349c7ec Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 11:57:44 -0700 Subject: [PATCH 62/71] fix --- .../connector_builder_handler.py | 15 +++++++++++---- airbyte-cdk/python/connector_builder/main.py | 2 +- .../test_connector_builder_handler.py | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 15b8f548a290..80605868e2a1 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -19,12 +19,19 @@ def list_streams() -> AirbyteMessage: raise NotImplementedError +DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5 +DEFAULT_MAXIMUM_NUMBER_OF_SLICES = 5 +DEFAULT_MAX_RECORDS = 100 + + def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: try: - command_config = config["__test_read_config"] - max_pages_per_slice = command_config["max_pages_per_slice"] - max_slices = command_config["max_slices"] - max_records = command_config["max_records"] + if "__test_read_config" not in config: + raise ValueError("Missing __test_read_config field in config file") + command_config = config.get("__test_read_config", {}) + max_pages_per_slice = command_config.get("max_pages_per_slice", DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE) + max_slices = command_config.get("max_slices", DEFAULT_MAXIMUM_NUMBER_OF_SLICES) + max_records = command_config.get("max_records", DEFAULT_MAX_RECORDS) handler = MessageGrouper(max_pages_per_slice, max_slices) stream_name = configured_catalog.streams[0].stream.name # The connector builder only supports a single stream stream_read = handler.get_message_groups(source, config, configured_catalog, max_records) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 00471e191fda..59f9c43dc122 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -43,7 +43,7 @@ def handle_connector_builder_request(source: ManifestDeclarativeSource, config: command = config.get("__command") if command == "resolve_manifest": return resolve_manifest(source) - elif command == "read": + elif command == "test_read": return read_stream(source, config, catalog) else: raise ValueError(f"Unrecognized command {command}.") diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 216d2afab9f8..acfd95d37b5d 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -69,7 +69,7 @@ TEST_READ_CONFIG = { "__injected_declarative_manifest": MANIFEST, - "__command": "read", + "__command": "test_read", "__test_read_config": { "max_pages_per_slice": 2, "max_slices": 5, From 33022c92f5d95e1a5efe54a8b803a960616ea1a3 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 12:01:22 -0700 Subject: [PATCH 63/71] black format --- .../connector_builder/connector_builder_handler.py | 4 ++-- airbyte-cdk/python/connector_builder/main.py | 10 +++------- .../test_connector_builder_handler.py | 11 +---------- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 80605868e2a1..777930754e5d 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -6,8 +6,8 @@ from datetime import datetime from typing import Any, Mapping -from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, Type -from airbyte_cdk.models import ConfiguredAirbyteCatalog +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog +from airbyte_cdk.models import Type from airbyte_cdk.models import Type as MessageType from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 59f9c43dc122..f858b5380c54 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -6,16 +6,12 @@ import sys from typing import Any, List, Mapping, Tuple -from airbyte_cdk.models import ConfiguredAirbyteCatalog -from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import read_stream - -from typing import Any, List, Mapping - from airbyte_cdk.connector import BaseConnector from airbyte_cdk.entrypoint import AirbyteEntrypoint +from airbyte_cdk.models import ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import resolve_manifest +from connector_builder.connector_builder_handler import read_stream, resolve_manifest + def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarativeSource: manifest = config.get("__injected_declarative_manifest") diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index acfd95d37b5d..812ded072b4c 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -3,10 +3,9 @@ # import copy -import dataclasses import json from unittest import mock -from unittest.mock import MagicMock, patch +from unittest.mock import patch import connector_builder import pytest @@ -17,14 +16,6 @@ from connector_builder.main import handle_connector_builder_request, handle_request, read_stream from connector_builder.models import StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner from unit_tests.connector_builder.utils import create_configured_catalog -import json -from unittest import mock - -import connector_builder -import pytest -from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import resolve_manifest -from connector_builder.main import handle_connector_builder_request, handle_request _stream_name = "stream_with_custom_requester" _stream_primary_key = "id" From cccb96858da53aab6c13e093b9600de55fcd83d3 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 12:01:56 -0700 Subject: [PATCH 64/71] raise with config --- .../python/connector_builder/connector_builder_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 777930754e5d..24f2b2ec6eb6 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -41,7 +41,7 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured emitted_at=_emitted_at() )) except Exception as exc: - error = AirbyteTracedException.from_exception(exc, message=f"Error reading stream with catalog={configured_catalog}") + error = AirbyteTracedException.from_exception(exc, message=f"Error reading stream with config={config} and catalog={configured_catalog}") return error.as_airbyte_message() From a79b43b7c0f38d739f7c08d6d84fec89d5ace725 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 12:02:57 -0700 Subject: [PATCH 65/71] update --- airbyte-cdk/python/connector_builder/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index f858b5380c54..784f98216abb 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -13,9 +13,9 @@ from connector_builder.connector_builder_handler import read_stream, resolve_manifest -def create_source(config: Mapping[str, Any], debug: bool) -> ManifestDeclarativeSource: +def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: manifest = config.get("__injected_declarative_manifest") - return ManifestDeclarativeSource(manifest, debug) + return ManifestDeclarativeSource(manifest, True) def get_config_and_catalog_from_args(args: List[str]) -> Tuple[Mapping[str, Any], ConfiguredAirbyteCatalog]: @@ -48,7 +48,7 @@ def handle_connector_builder_request(source: ManifestDeclarativeSource, config: def handle_request(args: List[str]): config, catalog = get_config_and_catalog_from_args(args) if "__command" in config: - source = create_source(config, True) + source = create_source(config) print(handle_connector_builder_request(source, config, catalog)) else: raise ValueError("Missing __command argument in config file.") From 4ace71b0196ca8bcdf2d86d579ac57b0c5fc7aa8 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Tue, 14 Mar 2023 12:18:36 -0700 Subject: [PATCH 66/71] fix flake --- airbyte-cdk/python/connector_builder/main.py | 4 ++-- .../python/connector_builder/message_grouper.py | 4 ++-- airbyte-cdk/python/connector_builder/models.py | 12 ++++++++++-- .../connector_builder/test_message_grouper.py | 2 +- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 784f98216abb..09df2255cfe1 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -24,8 +24,8 @@ def get_config_and_catalog_from_args(args: List[str]) -> Tuple[Mapping[str, Any] if parsed_args.command != "read": raise ValueError("Only read commands are allowed for Connector Builder requests.") - config = BaseConnector.read_config(parsed_args.config) - catalog = ConfiguredAirbyteCatalog.parse_obj(BaseConnector.read_config(parsed_args.catalog)) + config = BaseConnector.read_config(config_path) + catalog = ConfiguredAirbyteCatalog.parse_obj(BaseConnector.read_config(catalog_path)) if "__injected_declarative_manifest" not in config: raise ValueError( diff --git a/airbyte-cdk/python/connector_builder/message_grouper.py b/airbyte-cdk/python/connector_builder/message_grouper.py index 50213ab792a3..9c0482adfe09 100644 --- a/airbyte-cdk/python/connector_builder/message_grouper.py +++ b/airbyte-cdk/python/connector_builder/message_grouper.py @@ -41,7 +41,7 @@ def get_message_groups(self, slices = [] log_messages = [] - state = {} # No support for incremental sync + state = {} # No support for incremental sync for message_group in self._get_message_groups( source.read(self.logger, config, configured_catalog, state), schema_inferrer, @@ -56,7 +56,7 @@ def get_message_groups(self, logs=log_messages, slices=slices, test_read_limit_reached=self._has_reached_limit(slices), - inferred_schema=schema_inferrer.get_stream_schema(configured_catalog.streams[0].stream.name) # The connector builder currently only supports reading from a single stream at a time + inferred_schema=schema_inferrer.get_stream_schema(configured_catalog.streams[0].stream.name) # The connector builder currently only supports reading from a single stream at a time ) def _get_message_groups( diff --git a/airbyte-cdk/python/connector_builder/models.py b/airbyte-cdk/python/connector_builder/models.py index 732f5816c498..e866c6a0c21d 100644 --- a/airbyte-cdk/python/connector_builder/models.py +++ b/airbyte-cdk/python/connector_builder/models.py @@ -13,6 +13,7 @@ class HttpResponse: body: Optional[str] = None headers: Optional[Dict[str, Any]] = None + @dataclass class HttpRequest: url: str @@ -20,30 +21,35 @@ class HttpRequest: body: Optional[Dict[str, Any]] headers: Optional[Dict[str, Any]] http_method: str + + @dataclass class StreamReadPages: records: List[object] request: Optional[HttpRequest] = None response: Optional[HttpResponse] = None + @dataclass class StreamReadSlicesInnerPagesInner: - records: List[object] request: Optional[HttpRequest] response: Optional[HttpResponse] + @dataclass class StreamReadSlicesInnerSliceDescriptor: start_datetime: Optional[datetime] list_item: Optional[str] + @dataclass class StreamReadSlicesInner: pages: List[StreamReadSlicesInnerPagesInner] slice_descriptor: Optional[StreamReadSlicesInnerSliceDescriptor] state: Optional[Dict[str, Any]] + @dataclass class StreamRead(object): logs: List[object] @@ -51,6 +57,7 @@ class StreamRead(object): test_read_limit_reached: bool inferred_schema: Optional[Dict[str, Any]] + @dataclass class StreamReadRequestBody: manifest: Dict[str, Any] @@ -59,14 +66,15 @@ class StreamReadRequestBody: state: Optional[Dict[str, Any]] record_limit: Optional[int] + @dataclass class StreamReadSliceDescriptor: start_datetime: Optional[datetime] = None list_item: Optional[str] = None + @dataclass class StreamReadSlices: pages: List[StreamReadPages] slice_descriptor: Optional[StreamReadSliceDescriptor] = None state: Optional[Dict[str, Any]] = None - diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py index b12cd281cf62..33615df2970f 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_message_grouper.py @@ -375,7 +375,7 @@ def test_get_grouped_messages_invalid_group_format(): api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES) - with pytest.raises(ValueError) as actual_exception: + with pytest.raises(ValueError): api.get_message_groups(source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")) From 455e65aafd151d1809a2cc6cd8d4a13c257b17ae Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 15 Mar 2023 08:50:03 -0700 Subject: [PATCH 67/71] __test_read_config is optional --- .../python/connector_builder/connector_builder_handler.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/connector_builder/connector_builder_handler.py index 24f2b2ec6eb6..7efc4d9718b0 100644 --- a/airbyte-cdk/python/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/connector_builder/connector_builder_handler.py @@ -26,8 +26,6 @@ def list_streams() -> AirbyteMessage: def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> AirbyteMessage: try: - if "__test_read_config" not in config: - raise ValueError("Missing __test_read_config field in config file") command_config = config.get("__test_read_config", {}) max_pages_per_slice = command_config.get("max_pages_per_slice", DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE) max_slices = command_config.get("max_slices", DEFAULT_MAXIMUM_NUMBER_OF_SLICES) From 36152a1cb4d65cdb96639b9d9cd0d10080d9181d Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 15 Mar 2023 15:09:05 -0700 Subject: [PATCH 68/71] fix --- airbyte-cdk/python/connector_builder/main.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 963185a15d71..88d2cac633e0 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -10,12 +10,8 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint from airbyte_cdk.models import ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -<<<<<<< HEAD from connector_builder.connector_builder_handler import read_stream, resolve_manifest -======= from airbyte_cdk.utils.traced_exception import AirbyteTracedException -from connector_builder.connector_builder_handler import resolve_manifest ->>>>>>> master def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: From b308f0442f6a54f9d301541a8e66ec1fcf1f7295 Mon Sep 17 00:00:00 2001 From: girarda Date: Wed, 15 Mar 2023 22:14:03 +0000 Subject: [PATCH 69/71] Automated Commit - Formatting Changes --- airbyte-cdk/python/connector_builder/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 88d2cac633e0..18001a15c9d4 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -10,8 +10,8 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint from airbyte_cdk.models import ConfiguredAirbyteCatalog from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from connector_builder.connector_builder_handler import read_stream, resolve_manifest from airbyte_cdk.utils.traced_exception import AirbyteTracedException +from connector_builder.connector_builder_handler import read_stream, resolve_manifest def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource: From 40550b34f18e11d1e4aeae0d9ebeb6058e5a0168 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 15 Mar 2023 15:18:33 -0700 Subject: [PATCH 70/71] fix --- airbyte-cdk/python/connector_builder/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 88d2cac633e0..501d904da510 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -50,7 +50,7 @@ def handle_request(args: List[str]): config, catalog = get_config_and_catalog_from_args(args) if "__command" in config: source = create_source(config) - return handle_connector_builder_request(source, config).json() + return handle_connector_builder_request(source, config, catalog).json() else: raise ValueError("Missing __command argument in config file.") From b1beeb33f80df077487752dabef61a2cd2c5194a Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Wed, 15 Mar 2023 16:59:23 -0700 Subject: [PATCH 71/71] exclude_unset --- airbyte-cdk/python/connector_builder/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/connector_builder/main.py b/airbyte-cdk/python/connector_builder/main.py index 766c743623be..fa5a7115e11c 100644 --- a/airbyte-cdk/python/connector_builder/main.py +++ b/airbyte-cdk/python/connector_builder/main.py @@ -50,7 +50,7 @@ def handle_request(args: List[str]): config, catalog = get_config_and_catalog_from_args(args) if "__command" in config: source = create_source(config) - return handle_connector_builder_request(source, config, catalog).json() + return handle_connector_builder_request(source, config, catalog).json(exclude_unset=True) else: raise ValueError("Missing __command argument in config file.") @@ -61,4 +61,4 @@ def handle_request(args: List[str]): except Exception as exc: error = AirbyteTracedException.from_exception(exc, message="Error handling request.") m = error.as_airbyte_message() - print(error.as_airbyte_message().json()) + print(error.as_airbyte_message().json(exclude_unset=True))