From a934b684c682dbbb3599ff89e0a34b7c4066cde2 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 18 May 2021 11:30:44 +0800 Subject: [PATCH 01/34] Use CDK to generate source that can be configured to emit a certain number of records and always works. --- .../source-always-works/.dockerignore | 7 + .../connectors/source-always-works/Dockerfile | 15 ++ .../connectors/source-always-works/README.md | 129 ++++++++++++++++++ .../acceptance-test-config.yml | 30 ++++ .../acceptance-test-docker.sh | 7 + .../source-always-works/build.gradle | 14 ++ .../integration_tests/__init__.py | 0 .../integration_tests/abnormal_state.json | 5 + .../integration_tests/acceptance.py | 36 +++++ .../integration_tests/catalog.json | 19 +++ .../integration_tests/configured_catalog.json | 23 ++++ .../integration_tests/invalid_config.json | 3 + .../integration_tests/sample_config.json | 3 + .../connectors/source-always-works/main.py | 33 +++++ .../source-always-works/requirements.txt | 2 + .../connectors/source-always-works/setup.py | 48 +++++++ .../source_always_works/__init__.py | 27 ++++ .../schemas/always_works_stream.json | 9 ++ .../source_always_works/source.py | 101 ++++++++++++++ .../source_always_works/spec.json | 16 +++ .../unit_tests/unit_test.py | 27 ++++ 21 files changed, 554 insertions(+) create mode 100644 airbyte-integrations/connectors/source-always-works/.dockerignore create mode 100644 airbyte-integrations/connectors/source-always-works/Dockerfile create mode 100644 airbyte-integrations/connectors/source-always-works/README.md create mode 100644 airbyte-integrations/connectors/source-always-works/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-always-works/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-always-works/build.gradle create mode 100644 airbyte-integrations/connectors/source-always-works/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-always-works/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-always-works/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-always-works/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-always-works/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-always-works/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-always-works/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-always-works/main.py create mode 100644 airbyte-integrations/connectors/source-always-works/requirements.txt create mode 100644 airbyte-integrations/connectors/source-always-works/setup.py create mode 100644 airbyte-integrations/connectors/source-always-works/source_always_works/__init__.py create mode 100644 airbyte-integrations/connectors/source-always-works/source_always_works/schemas/always_works_stream.json create mode 100644 airbyte-integrations/connectors/source-always-works/source_always_works/source.py create mode 100644 airbyte-integrations/connectors/source-always-works/source_always_works/spec.json create mode 100644 airbyte-integrations/connectors/source-always-works/unit_tests/unit_test.py diff --git a/airbyte-integrations/connectors/source-always-works/.dockerignore b/airbyte-integrations/connectors/source-always-works/.dockerignore new file mode 100644 index 0000000000000..cdada38cab87d --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_always_works +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-always-works/Dockerfile b/airbyte-integrations/connectors/source-always-works/Dockerfile new file mode 100644 index 0000000000000..1dcb4a16ccac3 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.7-slim + +# Bash is installed for more convenient debugging. +RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* + +WORKDIR /airbyte/integration_code +COPY source_always_works ./source_always_works +COPY main.py ./ +COPY setup.py ./ +RUN pip install . + +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-always-works diff --git a/airbyte-integrations/connectors/source-always-works/README.md b/airbyte-integrations/connectors/source-always-works/README.md new file mode 100644 index 0000000000000..a82392d4f21ed --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/README.md @@ -0,0 +1,129 @@ +# Always Works Source + +This is the repository for the Always Works source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/always-works). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-always-works:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/always-works) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_always_works/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source always-works test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-always-works:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-always-works:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-always-works:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-always-works:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-always-works:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-always-works:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](source-acceptance-tests.md) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-always-works:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-always-works:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-always-works/acceptance-test-config.yml b/airbyte-integrations/connectors/source-always-works/acceptance-test-config.yml new file mode 100644 index 0000000000000..21baf0487cce1 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/acceptance-test-config.yml @@ -0,0 +1,30 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/contributing-to-airbyte/building-new-connector/source-acceptance-tests.md) +# for more information about how to configure these tests +connector_image: airbyte/source-always-works:dev +tests: + spec: + - spec_path: "source_always_works/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "exception" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + validate_output_from_all_streams: yes +# TODO uncomment this block to specify that the tests should assert the connector outputs the records provided in the input file a file +# expect_records: +# path: "integration_tests/expected_records.txt" +# extra_fields: no +# exact_order: no +# extra_records: yes + incremental: # TODO if your connector does not implement incremental sync, remove this block + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + state_path: "integration_tests/abnormal_state.json" + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-always-works/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-always-works/acceptance-test-docker.sh new file mode 100644 index 0000000000000..1425ff74f1511 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/acceptance-test-docker.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input diff --git a/airbyte-integrations/connectors/source-always-works/build.gradle b/airbyte-integrations/connectors/source-always-works/build.gradle new file mode 100644 index 0000000000000..f96068f38d70d --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_always_works' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-always-works/integration_tests/__init__.py b/airbyte-integrations/connectors/source-always-works/integration_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-integrations/connectors/source-always-works/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-always-works/integration_tests/abnormal_state.json new file mode 100644 index 0000000000000..52b0f2c2118f4 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-always-works/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-always-works/integration_tests/acceptance.py new file mode 100644 index 0000000000000..eeb4a2d3e02e5 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/integration_tests/acceptance.py @@ -0,0 +1,36 @@ +# +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-always-works/integration_tests/catalog.json b/airbyte-integrations/connectors/source-always-works/integration_tests/catalog.json new file mode 100644 index 0000000000000..23da72ab1a5cf --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/integration_tests/catalog.json @@ -0,0 +1,19 @@ +{ + "streams": [ + { + "name": "always_works_stream", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": "stub", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "count": { + "type": "integer" + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/source-always-works/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-always-works/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..0822a4ac8caa4 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/integration_tests/configured_catalog.json @@ -0,0 +1,23 @@ +{ + "streams": [ + { + "stream": { + "name": "always_works_stream", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": ["stub"], + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "count": { + "type": "integer" + } + } + } + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-always-works/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-always-works/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..f3732995784f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/integration_tests/invalid_config.json @@ -0,0 +1,3 @@ +{ + "todo-wrong-field": "this should be an incomplete config file, used in standard tests" +} diff --git a/airbyte-integrations/connectors/source-always-works/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-always-works/integration_tests/sample_config.json new file mode 100644 index 0000000000000..ecc4913b84c74 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/integration_tests/sample_config.json @@ -0,0 +1,3 @@ +{ + "fix-me": "TODO" +} diff --git a/airbyte-integrations/connectors/source-always-works/main.py b/airbyte-integrations/connectors/source-always-works/main.py new file mode 100644 index 0000000000000..9ecc637c68849 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/main.py @@ -0,0 +1,33 @@ +# +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_always_works import SourceAlwaysWorks + +if __name__ == "__main__": + source = SourceAlwaysWorks() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-always-works/requirements.txt b/airbyte-integrations/connectors/source-always-works/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-always-works/setup.py b/airbyte-integrations/connectors/source-always-works/setup.py new file mode 100644 index 0000000000000..52a13a5ad172b --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/setup.py @@ -0,0 +1,48 @@ +# +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "source-acceptance-test", +] + +setup( + name="source_always_works", + description="Source implementation for Always Works.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-always-works/source_always_works/__init__.py b/airbyte-integrations/connectors/source-always-works/source_always_works/__init__.py new file mode 100644 index 0000000000000..e5b3b92da7def --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/source_always_works/__init__.py @@ -0,0 +1,27 @@ +""" +MIT License + +Copyright (c) 2020 Airbyte + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +from .source import SourceAlwaysWorks + +__all__ = ["SourceAlwaysWorks"] diff --git a/airbyte-integrations/connectors/source-always-works/source_always_works/schemas/always_works_stream.json b/airbyte-integrations/connectors/source-always-works/source_always_works/schemas/always_works_stream.json new file mode 100644 index 0000000000000..c450eda71eea2 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/source_always_works/schemas/always_works_stream.json @@ -0,0 +1,9 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "count": { + "type": "integer" + } + } +} diff --git a/airbyte-integrations/connectors/source-always-works/source_always_works/source.py b/airbyte-integrations/connectors/source-always-works/source_always_works/source.py new file mode 100644 index 0000000000000..8f83cee637ecf --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/source_always_works/source.py @@ -0,0 +1,101 @@ +# +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + + +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream + + +class AlwaysWorksStream(HttpStream): + + current_counter = 0 # Counter for current response number. + url_base = "stub_base" + primary_key = "stub_key" + + def __init__(self, limit: int): + super().__init__() + self._limit = limit + + # Ignored Functions + def path(self, stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None) -> str: + return "unused" + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + return {} + + # Used Functions + def read_records( + self, + sync_mode: SyncMode, + stream_state: Mapping[str, Any] = None, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + ) -> Iterable[Mapping[str, Any]]: + """ + Override this to only call the parse_response function. + """ + return self.parse_response(requests.Response()) + + def stream_slices( + self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None + ) -> Iterable[Optional[Mapping[str, Any]]]: + """ + Return the self._limit number of slices, so the same number of read_record + calls are made. + """ + for i in range(self._limit): + yield i + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + """ + Returns an incrementing number. + """ + self.current_counter += 1 + yield {"count": self.current_counter} + + +class SourceAlwaysWorks(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + """ + Always works is always working; return True + """ + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + """ + :param config: A Mapping of the user input configuration as defined in the connector spec. + """ + return [AlwaysWorksStream(config['limit'])] diff --git a/airbyte-integrations/connectors/source-always-works/source_always_works/spec.json b/airbyte-integrations/connectors/source-always-works/source_always_works/spec.json new file mode 100644 index 0000000000000..17c8f21a26fcf --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/source_always_works/spec.json @@ -0,0 +1,16 @@ +{ + "documentationUrl": "http://www.stubstub.stub", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Always Works Spec", + "type": "object", + "required": ["limit"], + "additionalProperties": false, + "properties": { + "limit": { + "type": "number", + "description": "Number of records the Always On Stream will return." + } + } + } +} diff --git a/airbyte-integrations/connectors/source-always-works/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-always-works/unit_tests/unit_test.py new file mode 100644 index 0000000000000..b8a8150b507fd --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/unit_tests/unit_test.py @@ -0,0 +1,27 @@ +# +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + + +def test_example_method(): + assert True From d2ab458bbd7f189ded7f0b774ca211b24d51b070 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 18 May 2021 17:18:30 +0800 Subject: [PATCH 02/34] Checkpoint: socat works from inside the docker container. --- .../connectors/source-always-works/.dockerignore | 1 + .../connectors/source-always-works/Dockerfile | 5 +++-- airbyte-integrations/connectors/source-always-works/socat.sh | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100755 airbyte-integrations/connectors/source-always-works/socat.sh diff --git a/airbyte-integrations/connectors/source-always-works/.dockerignore b/airbyte-integrations/connectors/source-always-works/.dockerignore index cdada38cab87d..6118eac006098 100644 --- a/airbyte-integrations/connectors/source-always-works/.dockerignore +++ b/airbyte-integrations/connectors/source-always-works/.dockerignore @@ -5,3 +5,4 @@ !source_always_works !setup.py !secrets +!socat.sh diff --git a/airbyte-integrations/connectors/source-always-works/Dockerfile b/airbyte-integrations/connectors/source-always-works/Dockerfile index 1dcb4a16ccac3..c46f8c286afe1 100644 --- a/airbyte-integrations/connectors/source-always-works/Dockerfile +++ b/airbyte-integrations/connectors/source-always-works/Dockerfile @@ -1,15 +1,16 @@ FROM python:3.7-slim # Bash is installed for more convenient debugging. -RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y bash socat && rm -rf /var/lib/apt/lists/* WORKDIR /airbyte/integration_code COPY source_always_works ./source_always_works COPY main.py ./ COPY setup.py ./ +COPY socat.sh ./ RUN pip install . -ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] +ENTRYPOINT ["./socat.sh"] LABEL io.airbyte.version=0.1.0 LABEL io.airbyte.name=airbyte/source-always-works diff --git a/airbyte-integrations/connectors/source-always-works/socat.sh b/airbyte-integrations/connectors/source-always-works/socat.sh new file mode 100755 index 0000000000000..070f8cef788e0 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/socat.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +echo "$@" +python "/airbyte/integration_code/main.py" "$@" | socat -d -d -d - TCP:host.docker.internal:9000 From 1ee5c5a945e92808168f08a771952493140ecb30 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 18 May 2021 17:23:31 +0800 Subject: [PATCH 03/34] Override the entry point. --- .../connectors/source-always-works/Dockerfile | 2 +- .../connectors/source-always-works/shim-docker/Dockerfile | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 airbyte-integrations/connectors/source-always-works/shim-docker/Dockerfile diff --git a/airbyte-integrations/connectors/source-always-works/Dockerfile b/airbyte-integrations/connectors/source-always-works/Dockerfile index c46f8c286afe1..0847eacb1f883 100644 --- a/airbyte-integrations/connectors/source-always-works/Dockerfile +++ b/airbyte-integrations/connectors/source-always-works/Dockerfile @@ -10,7 +10,7 @@ COPY setup.py ./ COPY socat.sh ./ RUN pip install . -ENTRYPOINT ["./socat.sh"] +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] LABEL io.airbyte.version=0.1.0 LABEL io.airbyte.name=airbyte/source-always-works diff --git a/airbyte-integrations/connectors/source-always-works/shim-docker/Dockerfile b/airbyte-integrations/connectors/source-always-works/shim-docker/Dockerfile new file mode 100644 index 0000000000000..6bcdcf44b8956 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/shim-docker/Dockerfile @@ -0,0 +1,8 @@ +FROM airbyte/source-always-works:dev + +WORKDIR /airbyte/integration_code + +ENTRYPOINT ["./socat.sh"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-always-works-socat-shim From 001b4a79f0257015731cf5f2be3ac500ea7b0406 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 18 May 2021 17:31:30 +0800 Subject: [PATCH 04/34] Clean up and add ReadMe. --- .../source-always-works/docker-shim-mvp/README.md | 13 +++++++++++++ .../docker-shim-base}/Dockerfile | 1 + 2 files changed, 14 insertions(+) create mode 100644 airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md rename airbyte-integrations/connectors/source-always-works/{shim-docker => docker-shim-mvp/docker-shim-base}/Dockerfile (69%) diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md new file mode 100644 index 0000000000000..c1e080a686f5d --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md @@ -0,0 +1,13 @@ +## Introduction +MVP for shim over our current entrypoint using `socat`. + +Build the `Dockerfile` in the `docker-shim-base`. + +Note: This only handle the simpler CDK entrypoint now. This does not handle the more complex +Java entrypoints. + +Run as such: +`docker run -it --rm --network=host -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json` + +Listen on localhost: +`socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout` diff --git a/airbyte-integrations/connectors/source-always-works/shim-docker/Dockerfile b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile similarity index 69% rename from airbyte-integrations/connectors/source-always-works/shim-docker/Dockerfile rename to airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile index 6bcdcf44b8956..f8ae0235285e7 100644 --- a/airbyte-integrations/connectors/source-always-works/shim-docker/Dockerfile +++ b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile @@ -1,3 +1,4 @@ +# Replace this with an image of any Python source that uses the new CDK entrypoint. FROM airbyte/source-always-works:dev WORKDIR /airbyte/integration_code From b90ec7101cd12aed538e32be7fa856fd7c7d3fb6 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 18 May 2021 19:19:18 +0800 Subject: [PATCH 05/34] Clean up socat. --- .../connectors/source-always-works/.dockerignore | 1 - .../connectors/source-always-works/Dockerfile | 1 - .../destination-listen-and-echo/Dockerfile | 11 +++++++++++ .../destination-listen-and-echo/listen-and-echo.sh | 4 ++++ .../docker-shim-mvp/docker-shim-base/Dockerfile | 2 ++ .../{ => docker-shim-mvp/docker-shim-base}/socat.sh | 3 ++- 6 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/Dockerfile create mode 100755 airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh rename airbyte-integrations/connectors/source-always-works/{ => docker-shim-mvp/docker-shim-base}/socat.sh (57%) diff --git a/airbyte-integrations/connectors/source-always-works/.dockerignore b/airbyte-integrations/connectors/source-always-works/.dockerignore index 6118eac006098..cdada38cab87d 100644 --- a/airbyte-integrations/connectors/source-always-works/.dockerignore +++ b/airbyte-integrations/connectors/source-always-works/.dockerignore @@ -5,4 +5,3 @@ !source_always_works !setup.py !secrets -!socat.sh diff --git a/airbyte-integrations/connectors/source-always-works/Dockerfile b/airbyte-integrations/connectors/source-always-works/Dockerfile index 0847eacb1f883..52f9db22a3d1a 100644 --- a/airbyte-integrations/connectors/source-always-works/Dockerfile +++ b/airbyte-integrations/connectors/source-always-works/Dockerfile @@ -7,7 +7,6 @@ WORKDIR /airbyte/integration_code COPY source_always_works ./source_always_works COPY main.py ./ COPY setup.py ./ -COPY socat.sh ./ RUN pip install . ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/Dockerfile b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/Dockerfile new file mode 100644 index 0000000000000..f166afd62d0ba --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/Dockerfile @@ -0,0 +1,11 @@ +# Use the same java base to simulate a destination image. +#FROM airbyte/integration-base-java:dev +FROM debian:latest + +RUN apt-get update && apt-get install -y bash socat && rm -rf /var/lib/apt/lists/* + +WORKDIR /airbyte/integration_code + +COPY listen-and-echo.sh ./ + +ENTRYPOINT ["./listen-and-echo.sh"] diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh new file mode 100755 index 0000000000000..5ae5c03c2aeeb --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +echo "ip: ${DEST_IP}", "port: ${DEST_PORT}" +socat -d -d -d TCP-LISTEN:"${DEST_PORT}",bind="${DEST_IP}" stdout diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile index f8ae0235285e7..bf595b601a359 100644 --- a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile +++ b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile @@ -3,6 +3,8 @@ FROM airbyte/source-always-works:dev WORKDIR /airbyte/integration_code +COPY socat.sh ./ + ENTRYPOINT ["./socat.sh"] LABEL io.airbyte.version=0.1.0 diff --git a/airbyte-integrations/connectors/source-always-works/socat.sh b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/socat.sh similarity index 57% rename from airbyte-integrations/connectors/source-always-works/socat.sh rename to airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/socat.sh index 070f8cef788e0..a6409ef94340c 100755 --- a/airbyte-integrations/connectors/source-always-works/socat.sh +++ b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/socat.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +echo "ip: ${SRC_IP}", "port: ${SRC_PORT}" echo "$@" -python "/airbyte/integration_code/main.py" "$@" | socat -d -d -d - TCP:host.docker.internal:9000 +python "/airbyte/integration_code/main.py" "$@" | socat -d -d -d - TCP:${SRC_IP}:${SRC_PORT} From 1fb1b84bd10870a25a46aac44e77dff9ff84cd36 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Wed, 19 May 2021 16:19:19 +0800 Subject: [PATCH 06/34] Checkpoint: connect to Kube cluster and list all the pods. --- airbyte-workers/build.gradle | 1 + .../process/KubeProcessBuilderFactory.java | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index 255ccc8a46b71..b550fb24f6d46 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -17,6 +17,7 @@ dependencies { implementation 'io.kubernetes:client-java-extended:10.0.0' implementation 'io.temporal:temporal-sdk:1.0.4' implementation 'org.apache.ant:ant:1.10.10' + implementation 'io.kubernetes:client-java:10.0.0' implementation project(':airbyte-config:models') implementation project(':airbyte-db') diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java index 83af111cc8c52..5bbc29763af8b 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java @@ -32,6 +32,14 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.workers.WorkerException; +import io.kubernetes.client.openapi.ApiClient; +import io.kubernetes.client.openapi.ApiException; +import io.kubernetes.client.openapi.Configuration; +import io.kubernetes.client.openapi.apis.CoreV1Api; +import io.kubernetes.client.openapi.models.V1Pod; +import io.kubernetes.client.openapi.models.V1PodList; +import io.kubernetes.client.util.Config; +import java.io.IOException; import java.nio.file.Path; import java.util.Arrays; import java.util.List; @@ -96,4 +104,15 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina } } + public static void main(String[] args) throws IOException, ApiException { + ApiClient client = Config.defaultClient(); + Configuration.setDefaultApiClient(client); + + CoreV1Api api = new CoreV1Api(); + V1PodList list = api.listPodForAllNamespaces(null, null, null, null, null, null, null, null, null); + for (V1Pod item : list.getItems()) { + LOGGER.info(item.getMetadata().getName()); + } + } + } From bd3f17fb966e81fa6615972877f1f90030152dce Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Wed, 19 May 2021 21:41:18 +0800 Subject: [PATCH 07/34] Checkpoint: Sync worker pod is able to send output to the destination pod. --- airbyte-workers/Dockerfile | 12 ++++++ airbyte-workers/build.gradle | 9 ++++- .../process/KubeProcessBuilderFactory.java | 40 +++++++++++++++---- .../default_service_account_perms.yaml | 30 ++++++++++++++ .../destination-listen-and-echo.yaml | 15 +++++++ .../src/main/resources/kube-sync-workers.yaml | 9 +++++ 6 files changed, 106 insertions(+), 9 deletions(-) create mode 100644 airbyte-workers/Dockerfile create mode 100644 airbyte-workers/src/main/resources/default_service_account_perms.yaml create mode 100644 airbyte-workers/src/main/resources/destination-listen-and-echo.yaml create mode 100644 airbyte-workers/src/main/resources/kube-sync-workers.yaml diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile new file mode 100644 index 0000000000000..bfb4eda8b18e2 --- /dev/null +++ b/airbyte-workers/Dockerfile @@ -0,0 +1,12 @@ +FROM openjdk:14.0.2-slim + +WORKDIR /airbyte + +COPY build/distributions/airbyte-workers*.tar airbyte-workers.tar + +RUN tar xf airbyte-workers.tar --strip-components=1 + +ENTRYPOINT ["./bin/airbyte-workers"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/kube-sync-worker-test diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index b550fb24f6d46..c8f27bfa3026c 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -1,8 +1,13 @@ import org.jsonschema2pojo.SourceType plugins { - id 'java-library' + id 'application' id 'com.github.eirnym.js2p' version '1.0' + id 'airbyte-docker' +} + +application { + mainClass = 'io.airbyte.workers.process.KubeProcessBuilderFactory' } configurations { @@ -17,7 +22,7 @@ dependencies { implementation 'io.kubernetes:client-java-extended:10.0.0' implementation 'io.temporal:temporal-sdk:1.0.4' implementation 'org.apache.ant:ant:1.10.10' - implementation 'io.kubernetes:client-java:10.0.0' + implementation 'io.fabric8:kubernetes-client:5.3.1' implementation project(':airbyte-config:models') implementation project(':airbyte-db') diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java index 5bbc29763af8b..ac822f7a625ba 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java @@ -32,6 +32,10 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.workers.WorkerException; +import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodList; +import io.fabric8.kubernetes.client.DefaultKubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClient; import io.kubernetes.client.openapi.ApiClient; import io.kubernetes.client.openapi.ApiException; import io.kubernetes.client.openapi.Configuration; @@ -39,7 +43,11 @@ import io.kubernetes.client.openapi.models.V1Pod; import io.kubernetes.client.openapi.models.V1PodList; import io.kubernetes.client.util.Config; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.net.Socket; import java.nio.file.Path; import java.util.Arrays; import java.util.List; @@ -105,14 +113,32 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina } public static void main(String[] args) throws IOException, ApiException { - ApiClient client = Config.defaultClient(); - Configuration.setDefaultApiClient(client); - - CoreV1Api api = new CoreV1Api(); - V1PodList list = api.listPodForAllNamespaces(null, null, null, null, null, null, null, null, null); - for (V1Pod item : list.getItems()) { - LOGGER.info(item.getMetadata().getName()); + var PORT = 9000; + String IP = null; + KubernetesClient client = new DefaultKubernetesClient(); + // TODO: Assign labels to pods to narrow the search. + PodList pods = client.pods().inNamespace("default").list(); + for (Pod p : pods.getItems()) { + // TODO: filter for the actual pod. + // retrieve the ip + LOGGER.info(p.getMetadata().getName()); + LOGGER.info(p.getStatus().getPodIP()); + if (p.getMetadata().getName().equals("destination-listen-and-echo")) { + LOGGER.info("Found IP!"); + IP = p.getStatus().getPodIP(); + break; + } } + + client.close(); + + // TODO: create the pod + var clientSocket = new Socket(IP, PORT); + var out = new PrintWriter(clientSocket.getOutputStream(), true); + out.print("Hello!"); + out.close(); + // try and connect to this ip and send something + } } diff --git a/airbyte-workers/src/main/resources/default_service_account_perms.yaml b/airbyte-workers/src/main/resources/default_service_account_perms.yaml new file mode 100644 index 0000000000000..061f13a9ba302 --- /dev/null +++ b/airbyte-workers/src/main/resources/default_service_account_perms.yaml @@ -0,0 +1,30 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + creationTimestamp: null + name: default-role + namespace: default +rules: + - apiGroups: + - "" + resources: + - services + - pods + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + creationTimestamp: null + name: default-sa-binding + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: default-role +subjects: + - kind: ServiceAccount + name: default + namespace: default diff --git a/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml b/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml new file mode 100644 index 0000000000000..84c784727b54a --- /dev/null +++ b/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: destination-listen-and-echo +spec: + containers: + - name: destination-listen-and-echo + image: airbyte/destination-listen-and-echo:dev + ports: + - containerPort: 9000 + env: + - name: DEST_PORT + value: '9000' + - name: DEST_IP + value: '0.0.0.0' diff --git a/airbyte-workers/src/main/resources/kube-sync-workers.yaml b/airbyte-workers/src/main/resources/kube-sync-workers.yaml new file mode 100644 index 0000000000000..e11c99836b504 --- /dev/null +++ b/airbyte-workers/src/main/resources/kube-sync-workers.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Pod +metadata: + name: kube-sync-worker-test +spec: + containers: + - name: kube-sync-worker-test + image: airbyte/kube-sync-worker-test:dev + imagePullPolicy: Always From f9a6c9b94a1d4e90d2bae300e9ec1b4d15f9496c Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Wed, 19 May 2021 23:11:12 +0800 Subject: [PATCH 08/34] Checkpoint: Sync worker creates Dest pod if none existed previously. It also waits for the pod to be ready before doing anything else. Sync worker will also remove the pod on termination. --- .../process/KubeProcessBuilderFactory.java | 47 +++++++++++++++---- .../default_service_account_perms.yaml | 4 ++ .../destination-listen-and-echo.yaml | 1 + .../src/main/resources/kube-sync-workers.yaml | 1 + 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java index ac822f7a625ba..35039581843c0 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java @@ -33,7 +33,9 @@ import io.airbyte.commons.resources.MoreResources; import io.airbyte.workers.WorkerException; import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodBuilder; import io.fabric8.kubernetes.api.model.PodList; +import io.fabric8.kubernetes.api.model.PodSpec; import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; import io.kubernetes.client.openapi.ApiClient; @@ -48,9 +50,13 @@ import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.Socket; +import java.net.URL; import java.nio.file.Path; import java.util.Arrays; import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; +import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -112,33 +118,58 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina } } - public static void main(String[] args) throws IOException, ApiException { + public static void main(String[] args) throws IOException, ApiException, InterruptedException { + // TODO: This pod sometimes errors once on start up. Why? var PORT = 9000; String IP = null; + var destPodName = "destination-listen-and-echo"; KubernetesClient client = new DefaultKubernetesClient(); + + // Load spec and create the pod. + var stream = KubeProcessBuilderFactory.class.getClassLoader().getResourceAsStream("destination-listen-and-echo.yaml"); + var destPodDef = client.pods().load(stream).get(); + LOGGER.info("Loaded spec: {}", destPodDef); + + var podSet = client.pods().inNamespace("default").list().getItems().stream() + .filter(pod -> pod.getMetadata().getName().equals(destPodName)).collect(Collectors.toSet()); + if (podSet.size() == 0) { + LOGGER.info("Pod does not exist"); + Pod destPod = client.pods().create(destPodDef); + LOGGER.info("Created pod: {}, waiting for it to be ready", destPod); + client.resource(destPod).waitUntilReady(1, TimeUnit.MINUTES); + LOGGER.info("Dest Pod ready"); + } + + // TODO: Why does this not work? + // LOGGER.info(destPod.getStatus().getPodIP()); + // destPod = client.resource(destPod).get(); + // LOGGER.info("Status: {}", destPod.getStatus()); + // LOGGER.info("IP: {}", destPod.getStatus().getPodIP()); + // IP = destPod.getStatus().getPodIP(); + // TODO: Assign labels to pods to narrow the search. PodList pods = client.pods().inNamespace("default").list(); for (Pod p : pods.getItems()) { - // TODO: filter for the actual pod. - // retrieve the ip LOGGER.info(p.getMetadata().getName()); LOGGER.info(p.getStatus().getPodIP()); - if (p.getMetadata().getName().equals("destination-listen-and-echo")) { + // Filter by pod and retrieve IP. + if (p.getMetadata().getName().equals(destPodName)) { LOGGER.info("Found IP!"); IP = p.getStatus().getPodIP(); break; } } - client.close(); - - // TODO: create the pod + // Send something! var clientSocket = new Socket(IP, PORT); var out = new PrintWriter(clientSocket.getOutputStream(), true); out.print("Hello!"); out.close(); - // try and connect to this ip and send something + client.pods().delete(destPodDef); + // TODO: Why does this wait not work? + client.resource(destPodDef).waitUntilCondition(pod -> !pod.getStatus().getPhase().equals("Terminating"), 1, TimeUnit.MINUTES); + client.close(); } } diff --git a/airbyte-workers/src/main/resources/default_service_account_perms.yaml b/airbyte-workers/src/main/resources/default_service_account_perms.yaml index 061f13a9ba302..776010ddfb45f 100644 --- a/airbyte-workers/src/main/resources/default_service_account_perms.yaml +++ b/airbyte-workers/src/main/resources/default_service_account_perms.yaml @@ -13,6 +13,10 @@ rules: verbs: - get - list + - create + - update + - delete + - watch --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding diff --git a/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml b/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml index 84c784727b54a..fcd6f1bb50d26 100644 --- a/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml +++ b/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml @@ -2,6 +2,7 @@ apiVersion: v1 kind: Pod metadata: name: destination-listen-and-echo + namespace: default spec: containers: - name: destination-listen-and-echo diff --git a/airbyte-workers/src/main/resources/kube-sync-workers.yaml b/airbyte-workers/src/main/resources/kube-sync-workers.yaml index e11c99836b504..873c94e4ecc74 100644 --- a/airbyte-workers/src/main/resources/kube-sync-workers.yaml +++ b/airbyte-workers/src/main/resources/kube-sync-workers.yaml @@ -3,6 +3,7 @@ kind: Pod metadata: name: kube-sync-worker-test spec: + restartPolicy: Never containers: - name: kube-sync-worker-test image: airbyte/kube-sync-worker-test:dev From 921af611641cf11e5ffa3dda17fe1cea80b556c5 Mon Sep 17 00:00:00 2001 From: jrhizor Date: Wed, 19 May 2021 08:20:44 -0700 Subject: [PATCH 09/34] update readme --- .../source-always-works/docker-shim-mvp/README.md | 13 ++++++++----- .../source_always_works/source.py | 8 ++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md index c1e080a686f5d..ab6ba11b53b09 100644 --- a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md +++ b/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md @@ -1,13 +1,16 @@ ## Introduction MVP for shim over our current entrypoint using `socat`. -Build the `Dockerfile` in the `docker-shim-base`. +1. From the `~/code/airbyte` directory run `./gradlew :airbyte-integrations:connectors:source-always-works:airbyteDocker` + +2. From `~/code/airbyte/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base` run `docker build -t airbyte/docker-shim-base:dev .` Note: This only handle the simpler CDK entrypoint now. This does not handle the more complex Java entrypoints. -Run as such: -`docker run -it --rm --network=host -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json` +3. Run `echo "{\"limit\": 1000}" > ~/code/airbyte/airbyte-integrations/connectors/source-always-works/secrets/config.json` + +4. Listen on localhost: + `socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout` -Listen on localhost: -`socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout` +5. From `~/code/airbyte/airbyte-integrations/connectors/source-always-works`, run `docker run -it --rm --network=host -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests -e SRC_IP=host.docker.internal -e SRC_PORT=9000 airbyte/docker-shim-base:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json` diff --git a/airbyte-integrations/connectors/source-always-works/source_always_works/source.py b/airbyte-integrations/connectors/source-always-works/source_always_works/source.py index 8f83cee637ecf..0a5b81b1706ec 100644 --- a/airbyte-integrations/connectors/source-always-works/source_always_works/source.py +++ b/airbyte-integrations/connectors/source-always-works/source_always_works/source.py @@ -43,9 +43,9 @@ def __init__(self, limit: int): self._limit = limit # Ignored Functions - def path(self, stream_state: Mapping[str, Any] = None, - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None) -> str: + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: return "unused" def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: @@ -98,4 +98,4 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: """ :param config: A Mapping of the user input configuration as defined in the connector spec. """ - return [AlwaysWorksStream(config['limit'])] + return [AlwaysWorksStream(config["limit"])] From c01a3863c730d4938f2de78786857e5a2d89b66f Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Wed, 19 May 2021 23:23:35 +0800 Subject: [PATCH 10/34] Checkpoint: Dest pod does nott restart after finishing. Comment out delete command in Sync worker. --- .../workers/process/KubeProcessBuilderFactory.java | 9 ++++----- .../src/main/resources/destination-listen-and-echo.yaml | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java index 35039581843c0..ea27fd266c614 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java @@ -119,7 +119,6 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina } public static void main(String[] args) throws IOException, ApiException, InterruptedException { - // TODO: This pod sometimes errors once on start up. Why? var PORT = 9000; String IP = null; var destPodName = "destination-listen-and-echo"; @@ -134,7 +133,7 @@ public static void main(String[] args) throws IOException, ApiException, Interru .filter(pod -> pod.getMetadata().getName().equals(destPodName)).collect(Collectors.toSet()); if (podSet.size() == 0) { LOGGER.info("Pod does not exist"); - Pod destPod = client.pods().create(destPodDef); + Pod destPod = client.pods().create(destPodDef); // watch command? LOGGER.info("Created pod: {}, waiting for it to be ready", destPod); client.resource(destPod).waitUntilReady(1, TimeUnit.MINUTES); LOGGER.info("Dest Pod ready"); @@ -166,9 +165,9 @@ public static void main(String[] args) throws IOException, ApiException, Interru out.print("Hello!"); out.close(); - client.pods().delete(destPodDef); - // TODO: Why does this wait not work? - client.resource(destPodDef).waitUntilCondition(pod -> !pod.getStatus().getPhase().equals("Terminating"), 1, TimeUnit.MINUTES); +// client.pods().delete(destPodDef); +// // TODO: Why does this wait not work? +// client.resource(destPodDef).waitUntilCondition(pod -> !pod.getStatus().getPhase().equals("Terminating"), 1, TimeUnit.MINUTES); client.close(); } diff --git a/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml b/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml index fcd6f1bb50d26..5cb14ce527daf 100644 --- a/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml +++ b/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml @@ -4,6 +4,7 @@ metadata: name: destination-listen-and-echo namespace: default spec: + restartPolicy: Never containers: - name: destination-listen-and-echo image: airbyte/destination-listen-and-echo:dev From 435e9db65c0e24dad54ad5c6fdad289e1afa7a2d Mon Sep 17 00:00:00 2001 From: jrhizor Date: Wed, 19 May 2021 16:58:11 -0700 Subject: [PATCH 11/34] working towards named pipes --- .../source-always-works/named_pipes/README.md | 8 +++++++ .../named_pipes/np_dest/Dockerfile | 3 +++ .../named_pipes/np_dest/run.sh | 5 ++++ .../named_pipes/np_source/Dockerfile | 3 +++ .../named_pipes/np_source/run.sh | 6 +++++ .../process/KubeProcessBuilderFactory.java | 24 ++++--------------- 6 files changed, 30 insertions(+), 19 deletions(-) create mode 100644 airbyte-integrations/connectors/source-always-works/named_pipes/README.md create mode 100644 airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile create mode 100755 airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/run.sh create mode 100644 airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile create mode 100755 airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/README.md b/airbyte-integrations/connectors/source-always-works/named_pipes/README.md new file mode 100644 index 0000000000000..247301298571b --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/README.md @@ -0,0 +1,8 @@ +Try to use a mounted named pipe instead of using socat. +``` +mkdir /tmp/np_file +docker run -it --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_source:dev -c "mkfifo /tmp/np_file/pipe && /tmp/run.sh > /tmp/np_file/pipe && rm /tmp/np_file/pipe" | docker run -i --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_dest:dev -c "tail -f /tmp/np_file/pipe | /tmp/run.sh" +``` + + +docker run -it --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_source:dev -c "rm /tmp/np_file/pipe && mkfifo /tmp/np_file/pipe && /tmp/run.sh > /tmp/np_file/pipe" | docker run -i --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_dest:dev -c "tail -f /tmp/np_file/pipe | /tmp/run.sh" \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile b/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile new file mode 100644 index 0000000000000..9e899d21f975b --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile @@ -0,0 +1,3 @@ +FROM debian:latest +COPY run.sh /tmp/run.sh +ENTRYPOINT /tmp/run.sh diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/run.sh b/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/run.sh new file mode 100755 index 0000000000000..57b4f9695dfe6 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/run.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +while IFS='$\n' read -r line; do + echo "received $line" +done diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile b/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile new file mode 100644 index 0000000000000..85b78a910a187 --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile @@ -0,0 +1,3 @@ +FROM alpine:latest +COPY run.sh /tmp/run.sh +ENTRYPOINT /tmp/run.sh diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh b/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh new file mode 100755 index 0000000000000..860117ed5cd9d --- /dev/null +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +echo "{\"msg\": \"one\"}" +echo "{\"msg\": \"two\"}" +echo "{\"msg\": \"three\"}" +echo "{\"msg\": \"four\"}" diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java index 35039581843c0..e49744d45a53f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java @@ -140,25 +140,11 @@ public static void main(String[] args) throws IOException, ApiException, Interru LOGGER.info("Dest Pod ready"); } - // TODO: Why does this not work? - // LOGGER.info(destPod.getStatus().getPodIP()); - // destPod = client.resource(destPod).get(); - // LOGGER.info("Status: {}", destPod.getStatus()); - // LOGGER.info("IP: {}", destPod.getStatus().getPodIP()); - // IP = destPod.getStatus().getPodIP(); - - // TODO: Assign labels to pods to narrow the search. - PodList pods = client.pods().inNamespace("default").list(); - for (Pod p : pods.getItems()) { - LOGGER.info(p.getMetadata().getName()); - LOGGER.info(p.getStatus().getPodIP()); - // Filter by pod and retrieve IP. - if (p.getMetadata().getName().equals(destPodName)) { - LOGGER.info("Found IP!"); - IP = p.getStatus().getPodIP(); - break; - } - } + Pod destPod = client.pods().inNamespace("default").withName(destPodName).get(); + LOGGER.info("Found IP!"); + LOGGER.info("Status: {}", destPod.getStatus()); + LOGGER.info("IP: {}", destPod.getStatus().getPodIP()); + IP = destPod.getStatus().getPodIP(); // Send something! var clientSocket = new Socket(IP, PORT); From b8138302623394afa290a7670a08fb1b7d9f80cc Mon Sep 17 00:00:00 2001 From: jrhizor Date: Thu, 20 May 2021 07:43:07 -0700 Subject: [PATCH 12/34] named pipes working --- .../source-always-works/named_pipes/README.md | 28 +++++++++++++++++-- .../named_pipes/np_source/run.sh | 11 ++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/README.md b/airbyte-integrations/connectors/source-always-works/named_pipes/README.md index 247301298571b..9de676d28a38a 100644 --- a/airbyte-integrations/connectors/source-always-works/named_pipes/README.md +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/README.md @@ -1,8 +1,30 @@ Try to use a mounted named pipe instead of using socat. + +For builds: +``` +cd ~/code/airbyte/airbyte-integrations/connectors/source-always-works/named_pipes/np_source +docker build -t np_source:dev . +cd ~/code/airbyte/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest +docker build -t np_dest:dev . +``` + +Then run the "init container" equivalent: +``` +docker run -it --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_source:dev -c "rm /tmp/np_file/pipe && mkfifo /tmp/np_file/pipe" +``` + +Then run the source and destination at the same time (you can do either first): ``` -mkdir /tmp/np_file -docker run -it --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_source:dev -c "mkfifo /tmp/np_file/pipe && /tmp/run.sh > /tmp/np_file/pipe && rm /tmp/np_file/pipe" | docker run -i --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_dest:dev -c "tail -f /tmp/np_file/pipe | /tmp/run.sh" +docker run -it --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_source:dev -c "/tmp/run.sh > /tmp/np_file/pipe" +docker run -it --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_dest:dev -c "cat /tmp/np_file/pipe | /tmp/run.sh" ``` +Notes: +- The source blocks until something starts reading from the named pipe. +- `cat` detects when the source reaches its EOF +- You can start the destination before the source and it terminates as expected. -docker run -it --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_source:dev -c "rm /tmp/np_file/pipe && mkfifo /tmp/np_file/pipe && /tmp/run.sh > /tmp/np_file/pipe" | docker run -i --rm -v /tmp/np_file:/tmp/np_file --entrypoint "sh" np_dest:dev -c "tail -f /tmp/np_file/pipe | /tmp/run.sh" \ No newline at end of file +Todo: +- Set this up for Kube +- Use an init container to create the initial named pipe +- Use a shared mount diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh b/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh index 860117ed5cd9d..fe2661b222e8a 100755 --- a/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh @@ -1,6 +1,17 @@ #!/bin/sh echo "{\"msg\": \"one\"}" +sleep 1 echo "{\"msg\": \"two\"}" +sleep 1 echo "{\"msg\": \"three\"}" +sleep 1 echo "{\"msg\": \"four\"}" +sleep 1 +echo "{\"msg\": \"five\"}" +sleep 1 +echo "{\"msg\": \"six\"}" +sleep 1 +echo "{\"msg\": \"seven\"}" +sleep 1 +echo "{\"msg\": \"eight\"}" From 6f13510129fcb9bd70083e4052fa898e14b5380e Mon Sep 17 00:00:00 2001 From: jrhizor Date: Thu, 20 May 2021 07:44:22 -0700 Subject: [PATCH 13/34] update readme --- .../connectors/source-always-works/named_pipes/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/README.md b/airbyte-integrations/connectors/source-always-works/named_pipes/README.md index 9de676d28a38a..b8c4d8f9335a3 100644 --- a/airbyte-integrations/connectors/source-always-works/named_pipes/README.md +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/README.md @@ -28,3 +28,4 @@ Todo: - Set this up for Kube - Use an init container to create the initial named pipe - Use a shared mount +- Test message sizes / etc From 0a59f840239060d5a67cb894581d6d991353e9ae Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Fri, 21 May 2021 03:23:11 -0700 Subject: [PATCH 14/34] WIP named pipe / socat sidecar kube port forwarding (#3518) * nearly working sources * update * stdin example --- .../source-always-works/named_pipes/README.md | 1 + .../named_pipes/np_dest/Dockerfile | 1 + .../named_pipes/np_source/Dockerfile | 1 + .../airbyte/scheduler/app/SchedulerApp.java | 2 +- .../process/KubeProcessBuilderFactory.java | 119 +++++++++++------- .../resources/command_fetcher_template.yaml | 0 .../main/resources/kube_runner_template.yaml | 30 ----- .../main/resources/stdin_stdout_template.yaml | 43 +++++++ .../src/main/resources/stdout_template.yaml | 43 +++++++ 9 files changed, 167 insertions(+), 73 deletions(-) create mode 100644 airbyte-workers/src/main/resources/command_fetcher_template.yaml delete mode 100644 airbyte-workers/src/main/resources/kube_runner_template.yaml create mode 100644 airbyte-workers/src/main/resources/stdin_stdout_template.yaml create mode 100644 airbyte-workers/src/main/resources/stdout_template.yaml diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/README.md b/airbyte-integrations/connectors/source-always-works/named_pipes/README.md index b8c4d8f9335a3..49e8d6fd30e5a 100644 --- a/airbyte-integrations/connectors/source-always-works/named_pipes/README.md +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/README.md @@ -29,3 +29,4 @@ Todo: - Use an init container to create the initial named pipe - Use a shared mount - Test message sizes / etc +- Error code propagation diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile b/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile index 9e899d21f975b..b8c641c6cefea 100644 --- a/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile @@ -1,3 +1,4 @@ FROM debian:latest COPY run.sh /tmp/run.sh +ENV AIRBYTE_ENTRYPOINT="/tmp/run.sh" ENTRYPOINT /tmp/run.sh diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile b/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile index 85b78a910a187..a1cfc75062f45 100644 --- a/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile +++ b/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile @@ -1,3 +1,4 @@ FROM alpine:latest COPY run.sh /tmp/run.sh +ENV AIRBYTE_ENTRYPOINT="/tmp/run.sh" ENTRYPOINT /tmp/run.sh diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java index a3a70cf0356bb..9931df4a4f88a 100644 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java +++ b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java @@ -161,7 +161,7 @@ private void cleanupZombies(JobPersistence jobPersistence, JobNotifier jobNotifi private static ProcessBuilderFactory getProcessBuilderFactory(Configs configs) { if (configs.getWorkerEnvironment() == Configs.WorkerEnvironment.KUBERNETES) { - return new KubeProcessBuilderFactory(configs.getWorkspaceRoot()); + return new KubeProcessBuilderFactory(); } else { return new DockerProcessBuilderFactory( configs.getWorkspaceRoot(), diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java index 612417d4af727..02f5d26034f0f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java @@ -29,6 +29,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.google.common.base.Joiner; import com.google.common.collect.Lists; +import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.workers.WorkerException; @@ -54,9 +55,12 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.List; +import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.function.Predicate; import java.util.stream.Collectors; + +import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.RandomStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,13 +68,14 @@ public class KubeProcessBuilderFactory implements ProcessBuilderFactory { private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactory.class); + private final String resourceName; - private static final Path WORKSPACE_MOUNT_DESTINATION = Path.of("/workspace"); - - private final Path workspaceRoot; + public KubeProcessBuilderFactory() { + this.resourceName = null; // todo: somehow make the different types of processes configurable + } - public KubeProcessBuilderFactory(Path workspaceRoot) { - this.workspaceRoot = workspaceRoot; + public KubeProcessBuilderFactory(String resourceName) { + this.resourceName = resourceName; } @Override @@ -78,17 +83,21 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina throws WorkerException { try { - final String template = MoreResources.readResource("kube_runner_template.yaml"); + final String template = MoreResources.readResource(resourceName); // used to differentiate source and destination processes with the same id and attempt final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); ObjectMapper yamlMapper = new ObjectMapper(new YAMLFactory()); + String command = getCommandFromImage(imageName); + LOGGER.info("Using entrypoint from image: " + command); + final String rendered = template.replaceAll("JOBID", jobId) .replaceAll("ATTEMPTID", String.valueOf(attempt)) .replaceAll("IMAGE", imageName) .replaceAll("SUFFIX", suffix) + .replaceAll("COMMAND", command) .replaceAll("ARGS", Jsons.serialize(Arrays.asList(args))) .replaceAll("WORKDIR", jobRoot.toString()); @@ -102,7 +111,7 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina "kubectl", "run", "--generator=run-pod/v1", - "--rm", +// "--rm", todo: add this back in "-i", "--pod-running-timeout=24h", "--image=" + imageName, @@ -118,43 +127,69 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina } } - public static void main(String[] args) throws IOException, ApiException, InterruptedException { - var PORT = 9000; - String IP = null; - var destPodName = "destination-listen-and-echo"; - KubernetesClient client = new DefaultKubernetesClient(); - - // Load spec and create the pod. - var stream = KubeProcessBuilderFactory.class.getClassLoader().getResourceAsStream("destination-listen-and-echo.yaml"); - var destPodDef = client.pods().load(stream).get(); - LOGGER.info("Loaded spec: {}", destPodDef); - - var podSet = client.pods().inNamespace("default").list().getItems().stream() - .filter(pod -> pod.getMetadata().getName().equals(destPodName)).collect(Collectors.toSet()); - if (podSet.size() == 0) { - LOGGER.info("Pod does not exist"); - Pod destPod = client.pods().create(destPodDef); // watch command? - LOGGER.info("Created pod: {}, waiting for it to be ready", destPod); - client.resource(destPod).waitUntilReady(1, TimeUnit.MINUTES); - LOGGER.info("Dest Pod ready"); + // todo: this should really be cached + private static String getCommandFromImage(String imageName) throws IOException { + final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); + + final String podName = "airbyte-command-fetcher-" + suffix; + + final List cmd = + Lists.newArrayList( + "kubectl", + "run", + "--generator=run-pod/v1", + "--rm", + "-i", + "--pod-running-timeout=24h", + "--image=" + imageName, + "--command=true", + "--restart=Never", + podName, + "--", + "sh", + "-c", + "echo \"AIRBYTE_ENTRYPOINT=$AIRBYTE_ENTRYPOINT\""); + + Process start = new ProcessBuilder(cmd).start(); + + try(BufferedReader reader = IOs.newBufferedReader(start.getInputStream())) { + String line; + while ((line = reader.readLine()) != null && !line.contains("AIRBYTE_ENTRYPOINT")); + + if (line == null || !line.contains("AIRBYTE_ENTRYPOINT")) { + throw new RuntimeException("Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); + } else { + String[] splits = line.split("=", 2); + if(splits.length == 1) { + throw new RuntimeException("Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); + } else { + return splits[1]; + } + } } + } - Pod destPod = client.pods().inNamespace("default").withName(destPodName).get(); - LOGGER.info("Found IP!"); - LOGGER.info("Status: {}", destPod.getStatus()); - LOGGER.info("IP: {}", destPod.getStatus().getPodIP()); - IP = destPod.getStatus().getPodIP(); - - // Send something! - var clientSocket = new Socket(IP, PORT); - var out = new PrintWriter(clientSocket.getOutputStream(), true); - out.print("Hello!"); - out.close(); - -// client.pods().delete(destPodDef); -// // TODO: Why does this wait not work? -// client.resource(destPodDef).waitUntilCondition(pod -> !pod.getStatus().getPhase().equals("Terminating"), 1, TimeUnit.MINUTES); - client.close(); + public static void main(String[] args) { + try { + // todo: test this with args that are used by the process + Process process = new KubeProcessBuilderFactory("stdout_template.yaml") + .create(0L, 0, Path.of("/tmp"), "np_source:dev", null) + .start(); + + process.getOutputStream().write(100); + process.getInputStream().read(); + + // after running this main: + // kubectl port-forward airbyte-worker-0-0-fmave 9000:9000 + // socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout + + LOGGER.info("waiting..."); + int code = process.waitFor(); + LOGGER.info("code = " + code); + } catch (Exception e) { + LOGGER.error(e.getMessage()); + e.printStackTrace(); + } } } diff --git a/airbyte-workers/src/main/resources/command_fetcher_template.yaml b/airbyte-workers/src/main/resources/command_fetcher_template.yaml new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-workers/src/main/resources/kube_runner_template.yaml b/airbyte-workers/src/main/resources/kube_runner_template.yaml deleted file mode 100644 index dd2b0ba04d6e6..0000000000000 --- a/airbyte-workers/src/main/resources/kube_runner_template.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: airbyte-worker-JOBID-ATTEMPTID-SUFFIX -spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: airbyte - operator: In - values: - - scheduler - topologyKey: kubernetes.io/hostname - restartPolicy: Never - containers: - - name: worker - image: IMAGE - workingDir: WORKDIR - args: ARGS - stdin: true - stdinOnce: true - volumeMounts: - - name: airbyte-volume-workspace - mountPath: /workspace - volumes: - - name: airbyte-volume-workspace - persistentVolumeClaim: - claimName: airbyte-volume-workspace diff --git a/airbyte-workers/src/main/resources/stdin_stdout_template.yaml b/airbyte-workers/src/main/resources/stdin_stdout_template.yaml new file mode 100644 index 0000000000000..8ba5d579b3f47 --- /dev/null +++ b/airbyte-workers/src/main/resources/stdin_stdout_template.yaml @@ -0,0 +1,43 @@ +apiVersion: v1 +kind: Pod +metadata: + name: airbyte-worker-JOBID-ATTEMPTID-SUFFIX +spec: + restartPolicy: Never + initContainers: + - name: init + image: busybox:1.28 + command: [ 'sh', '-c', "mkfifo /pipes/stdin && mkfifo /pipes/stdout" ] + volumeMounts: + - name: airbyte-pipes + mountPath: /pipes + containers: + - name: worker + image: IMAGE + workingDir: WORKDIR + command: [ 'sh', '-c', "cat /pipes/stdin | COMMAND > /pipes/stdout" ] + args: ARGS + volumeMounts: +# - name: airbyte-volume-workspace +# mountPath: /workspace + - name: airbyte-pipes + mountPath: /pipes + - name: socat + image: alpine/socat:1.7.4.1-r1 + command: [ 'sh', '-c', "socat -d -d -d - TCP-L:9001 > /pipes/stdin" ] + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + ports: + - containerPort: 9000 + volumeMounts: + - name: airbyte-pipes + mountPath: /pipes + volumes: +# - name: airbyte-volume-workspace +# persistentVolumeClaim: +# claimName: airbyte-volume-workspace + - name: airbyte-pipes + emptyDir: {} diff --git a/airbyte-workers/src/main/resources/stdout_template.yaml b/airbyte-workers/src/main/resources/stdout_template.yaml new file mode 100644 index 0000000000000..56c6afcf9cd50 --- /dev/null +++ b/airbyte-workers/src/main/resources/stdout_template.yaml @@ -0,0 +1,43 @@ +apiVersion: v1 +kind: Pod +metadata: + name: airbyte-worker-JOBID-ATTEMPTID-SUFFIX +spec: + restartPolicy: Never + initContainers: + - name: init + image: busybox:1.28 + command: [ 'sh', '-c', "mkfifo /pipes/stdin && mkfifo /pipes/stdout" ] + volumeMounts: + - name: airbyte-pipes + mountPath: /pipes + containers: + - name: worker + image: IMAGE + workingDir: WORKDIR + command: [ 'sh', '-c', "COMMAND > /pipes/stdout" ] + args: ARGS + volumeMounts: + # - name: airbyte-volume-workspace + # mountPath: /workspace + - name: airbyte-pipes + mountPath: /pipes + - name: socat + image: alpine/socat:1.7.4.1-r1 + command: [ 'sh', '-c', "cat /pipes/stdout | socat -d -d -d - TCP:host.docker.internal:9000" ] # todo: pass in the sync worker ip + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + ports: + - containerPort: 9000 + volumeMounts: + - name: airbyte-pipes + mountPath: /pipes + volumes: + # - name: airbyte-volume-workspace + # persistentVolumeClaim: + # claimName: airbyte-volume-workspace + - name: airbyte-pipes + emptyDir: {} From c4d976b74e3bb9dc2cd3e35748f1c80e2b31db3b Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 21 May 2021 18:54:07 +0800 Subject: [PATCH 15/34] move all kube testing yamls into the airbyte-workers directories. sort the airbyte-workers resource folder; place all the poc yamls together. --- .../docker-shim-mvp/README.md | 0 .../docker-shim-mvp/destination-listen-and-echo/Dockerfile | 0 .../destination-listen-and-echo/listen-and-echo.sh | 0 .../docker-shim-mvp/docker-shim-base/Dockerfile | 0 .../docker-shim-mvp/docker-shim-base/socat.sh | 0 .../source-always-works => airbyte-workers}/named_pipes/README.md | 0 .../named_pipes/np_dest/Dockerfile | 0 .../named_pipes/np_dest/run.sh | 0 .../named_pipes/np_source/Dockerfile | 0 .../named_pipes/np_source/run.sh | 0 .../resources/{ => kube_queue_poc}/command_fetcher_template.yaml | 0 .../{ => kube_queue_poc}/default_service_account_perms.yaml | 0 .../{ => kube_queue_poc}/destination-listen-and-echo.yaml | 0 .../main/resources/{ => kube_queue_poc}/kube-sync-workers.yaml | 0 .../resources/{ => kube_queue_poc}/stdin_stdout_template.yaml | 0 .../src/main/resources/{ => kube_queue_poc}/stdout_template.yaml | 0 16 files changed, 0 insertions(+), 0 deletions(-) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/docker-shim-mvp/README.md (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/docker-shim-mvp/destination-listen-and-echo/Dockerfile (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/docker-shim-mvp/docker-shim-base/Dockerfile (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/docker-shim-mvp/docker-shim-base/socat.sh (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/named_pipes/README.md (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/named_pipes/np_dest/Dockerfile (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/named_pipes/np_dest/run.sh (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/named_pipes/np_source/Dockerfile (100%) rename {airbyte-integrations/connectors/source-always-works => airbyte-workers}/named_pipes/np_source/run.sh (100%) rename airbyte-workers/src/main/resources/{ => kube_queue_poc}/command_fetcher_template.yaml (100%) rename airbyte-workers/src/main/resources/{ => kube_queue_poc}/default_service_account_perms.yaml (100%) rename airbyte-workers/src/main/resources/{ => kube_queue_poc}/destination-listen-and-echo.yaml (100%) rename airbyte-workers/src/main/resources/{ => kube_queue_poc}/kube-sync-workers.yaml (100%) rename airbyte-workers/src/main/resources/{ => kube_queue_poc}/stdin_stdout_template.yaml (100%) rename airbyte-workers/src/main/resources/{ => kube_queue_poc}/stdout_template.yaml (100%) diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md b/airbyte-workers/docker-shim-mvp/README.md similarity index 100% rename from airbyte-integrations/connectors/source-always-works/docker-shim-mvp/README.md rename to airbyte-workers/docker-shim-mvp/README.md diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/Dockerfile b/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/Dockerfile similarity index 100% rename from airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/Dockerfile rename to airbyte-workers/docker-shim-mvp/destination-listen-and-echo/Dockerfile diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh b/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh similarity index 100% rename from airbyte-integrations/connectors/source-always-works/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh rename to airbyte-workers/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile b/airbyte-workers/docker-shim-mvp/docker-shim-base/Dockerfile similarity index 100% rename from airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/Dockerfile rename to airbyte-workers/docker-shim-mvp/docker-shim-base/Dockerfile diff --git a/airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/socat.sh b/airbyte-workers/docker-shim-mvp/docker-shim-base/socat.sh similarity index 100% rename from airbyte-integrations/connectors/source-always-works/docker-shim-mvp/docker-shim-base/socat.sh rename to airbyte-workers/docker-shim-mvp/docker-shim-base/socat.sh diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/README.md b/airbyte-workers/named_pipes/README.md similarity index 100% rename from airbyte-integrations/connectors/source-always-works/named_pipes/README.md rename to airbyte-workers/named_pipes/README.md diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile b/airbyte-workers/named_pipes/np_dest/Dockerfile similarity index 100% rename from airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/Dockerfile rename to airbyte-workers/named_pipes/np_dest/Dockerfile diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/run.sh b/airbyte-workers/named_pipes/np_dest/run.sh similarity index 100% rename from airbyte-integrations/connectors/source-always-works/named_pipes/np_dest/run.sh rename to airbyte-workers/named_pipes/np_dest/run.sh diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile b/airbyte-workers/named_pipes/np_source/Dockerfile similarity index 100% rename from airbyte-integrations/connectors/source-always-works/named_pipes/np_source/Dockerfile rename to airbyte-workers/named_pipes/np_source/Dockerfile diff --git a/airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh b/airbyte-workers/named_pipes/np_source/run.sh similarity index 100% rename from airbyte-integrations/connectors/source-always-works/named_pipes/np_source/run.sh rename to airbyte-workers/named_pipes/np_source/run.sh diff --git a/airbyte-workers/src/main/resources/command_fetcher_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml similarity index 100% rename from airbyte-workers/src/main/resources/command_fetcher_template.yaml rename to airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml diff --git a/airbyte-workers/src/main/resources/default_service_account_perms.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/default_service_account_perms.yaml similarity index 100% rename from airbyte-workers/src/main/resources/default_service_account_perms.yaml rename to airbyte-workers/src/main/resources/kube_queue_poc/default_service_account_perms.yaml diff --git a/airbyte-workers/src/main/resources/destination-listen-and-echo.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml similarity index 100% rename from airbyte-workers/src/main/resources/destination-listen-and-echo.yaml rename to airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml diff --git a/airbyte-workers/src/main/resources/kube-sync-workers.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml similarity index 100% rename from airbyte-workers/src/main/resources/kube-sync-workers.yaml rename to airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml diff --git a/airbyte-workers/src/main/resources/stdin_stdout_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml similarity index 100% rename from airbyte-workers/src/main/resources/stdin_stdout_template.yaml rename to airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml diff --git a/airbyte-workers/src/main/resources/stdout_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml similarity index 100% rename from airbyte-workers/src/main/resources/stdout_template.yaml rename to airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml From 16fca70b090d571e088362cf244e341c290973c9 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 21 May 2021 19:05:52 +0800 Subject: [PATCH 16/34] Format. --- .../process/KubeProcessBuilderFactory.java | 93 +--------- .../process/KubeProcessBuilderFactoryPOC.java | 161 ++++++++++++++++++ .../command_fetcher_template.yaml | 1 + .../destination-listen-and-echo.yaml | 18 +- .../kube_queue_poc/kube-sync-workers.yaml | 6 +- .../kube_queue_poc/stdin_stdout_template.yaml | 16 +- .../kube_queue_poc/stdout_template.yaml | 10 +- 7 files changed, 190 insertions(+), 115 deletions(-) create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java index 02f5d26034f0f..6ee1678edf68e 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java @@ -29,38 +29,12 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.google.common.base.Joiner; import com.google.common.collect.Lists; -import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.workers.WorkerException; -import io.fabric8.kubernetes.api.model.Pod; -import io.fabric8.kubernetes.api.model.PodBuilder; -import io.fabric8.kubernetes.api.model.PodList; -import io.fabric8.kubernetes.api.model.PodSpec; -import io.fabric8.kubernetes.client.DefaultKubernetesClient; -import io.fabric8.kubernetes.client.KubernetesClient; -import io.kubernetes.client.openapi.ApiClient; -import io.kubernetes.client.openapi.ApiException; -import io.kubernetes.client.openapi.Configuration; -import io.kubernetes.client.openapi.apis.CoreV1Api; -import io.kubernetes.client.openapi.models.V1Pod; -import io.kubernetes.client.openapi.models.V1PodList; -import io.kubernetes.client.util.Config; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.net.Socket; -import java.net.URL; import java.nio.file.Path; import java.util.Arrays; import java.util.List; -import java.util.Objects; -import java.util.concurrent.TimeUnit; -import java.util.function.Predicate; -import java.util.stream.Collectors; - -import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.RandomStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -111,7 +85,7 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina "kubectl", "run", "--generator=run-pod/v1", -// "--rm", todo: add this back in + // "--rm", todo: add this back in "-i", "--pod-running-timeout=24h", "--image=" + imageName, @@ -127,69 +101,4 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina } } - // todo: this should really be cached - private static String getCommandFromImage(String imageName) throws IOException { - final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); - - final String podName = "airbyte-command-fetcher-" + suffix; - - final List cmd = - Lists.newArrayList( - "kubectl", - "run", - "--generator=run-pod/v1", - "--rm", - "-i", - "--pod-running-timeout=24h", - "--image=" + imageName, - "--command=true", - "--restart=Never", - podName, - "--", - "sh", - "-c", - "echo \"AIRBYTE_ENTRYPOINT=$AIRBYTE_ENTRYPOINT\""); - - Process start = new ProcessBuilder(cmd).start(); - - try(BufferedReader reader = IOs.newBufferedReader(start.getInputStream())) { - String line; - while ((line = reader.readLine()) != null && !line.contains("AIRBYTE_ENTRYPOINT")); - - if (line == null || !line.contains("AIRBYTE_ENTRYPOINT")) { - throw new RuntimeException("Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); - } else { - String[] splits = line.split("=", 2); - if(splits.length == 1) { - throw new RuntimeException("Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); - } else { - return splits[1]; - } - } - } - } - - public static void main(String[] args) { - try { - // todo: test this with args that are used by the process - Process process = new KubeProcessBuilderFactory("stdout_template.yaml") - .create(0L, 0, Path.of("/tmp"), "np_source:dev", null) - .start(); - - process.getOutputStream().write(100); - process.getInputStream().read(); - - // after running this main: - // kubectl port-forward airbyte-worker-0-0-fmave 9000:9000 - // socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout - - LOGGER.info("waiting..."); - int code = process.waitFor(); - LOGGER.info("code = " + code); - } catch (Exception e) { - LOGGER.error(e.getMessage()); - e.printStackTrace(); - } - } - } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java new file mode 100644 index 0000000000000..472b20f580b10 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -0,0 +1,161 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.workers.process; + +import com.google.common.collect.Lists; +import io.airbyte.commons.io.IOs; +import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodList; +import io.fabric8.kubernetes.client.DefaultKubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClient; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.PrintWriter; +import java.net.Socket; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import org.apache.commons.lang3.RandomStringUtils; + +public class KubeProcessBuilderFactoryPOC { + + // todo: this should really be cached + private static String getCommandFromImage(String imageName) throws IOException { + final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); + + final String podName = "airbyte-command-fetcher-" + suffix; + + final List cmd = + Lists.newArrayList( + "kubectl", + "run", + "--generator=run-pod/v1", + "--rm", + "-i", + "--pod-running-timeout=24h", + "--image=" + imageName, + "--command=true", + "--restart=Never", + podName, + "--", + "sh", + "-c", + "echo \"AIRBYTE_ENTRYPOINT=$AIRBYTE_ENTRYPOINT\""); + + Process start = new ProcessBuilder(cmd).start(); + + try (BufferedReader reader = IOs.newBufferedReader(start.getInputStream())) { + String line; + while ((line = reader.readLine()) != null && !line.contains("AIRBYTE_ENTRYPOINT")); + + if (line == null || !line.contains("AIRBYTE_ENTRYPOINT")) { + throw new RuntimeException("Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); + } else { + String[] splits = line.split("=", 2); + if (splits.length == 1) { + throw new RuntimeException( + "Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); + } else { + return splits[1]; + } + } + } + } + + public static void main(String[] args) throws InterruptedException, IOException { + // try { + // // todo: test this with args that are used by the process + // Process process = new KubeProcessBuilderFactory("stdout_template.yaml") + // .create(0L, 0, Path.of("/tmp"), "np_source:dev", null) + // .start(); + // + // process.getOutputStream().write(100); + // process.getInputStream().read(); + // + // // after running this main: + // // kubectl port-forward airbyte-worker-0-0-fmave 9000:9000 + // // socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout + // + // LOGGER.info("waiting..."); + // int code = process.waitFor(); + // LOGGER.info("code = " + code); + // } catch (Exception e) { + // LOGGER.error(e.getMessage()); + // e.printStackTrace(); + // } + + var PORT = 9000; + String IP = null; + var destPodName = "destination-listen-and-echo"; + KubernetesClient client = new DefaultKubernetesClient(); + + // Load spec and create the pod. + var stream = KubeProcessBuilderFactory.class.getClassLoader().getResourceAsStream("destination-listen-and-echo.yaml"); + var destPodDef = client.pods().load(stream).get(); + LOGGER.info("Loaded spec: {}", destPodDef); + + var podSet = client.pods().inNamespace("default").list().getItems().stream() + .filter(pod -> pod.getMetadata().getName().equals(destPodName)).collect(Collectors.toSet()); + if (podSet.size() == 0) { + LOGGER.info("Pod does not exist"); + Pod destPod = client.pods().create(destPodDef); + LOGGER.info("Created pod: {}, waiting for it to be ready", destPod); + client.resource(destPod).waitUntilReady(1, TimeUnit.MINUTES); + LOGGER.info("Dest Pod ready"); + } + + // TODO: Why does this not work? + // LOGGER.info(destPod.getStatus().getPodIP()); + // destPod = client.resource(destPod).get(); + // LOGGER.info("Status: {}", destPod.getStatus()); + // LOGGER.info("IP: {}", destPod.getStatus().getPodIP()); + // IP = destPod.getStatus().getPodIP(); + + // TODO: Assign labels to pods to narrow the search. + PodList pods = client.pods().inNamespace("default").list(); + for (Pod p : pods.getItems()) { + LOGGER.info(p.getMetadata().getName()); + LOGGER.info(p.getStatus().getPodIP()); + // Filter by pod and retrieve IP. + if (p.getMetadata().getName().equals(destPodName)) { + LOGGER.info("Found IP!"); + IP = p.getStatus().getPodIP(); + break; + } + } + + // Send something! + var clientSocket = new Socket(IP, PORT); + var out = new PrintWriter(clientSocket.getOutputStream(), true); + out.print("Hello!"); + out.close(); + + client.pods().delete(destPodDef); + // TODO: Why does this wait not work? + client.resource(destPodDef).waitUntilCondition(pod -> !pod.getStatus().getPhase().equals("Terminating"), 1, TimeUnit.MINUTES); + client.close(); + } + +} diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml index e69de29bb2d1d..ed97d539c095c 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml @@ -0,0 +1 @@ +--- diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml index 5cb14ce527daf..d8610aa381684 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml @@ -6,12 +6,12 @@ metadata: spec: restartPolicy: Never containers: - - name: destination-listen-and-echo - image: airbyte/destination-listen-and-echo:dev - ports: - - containerPort: 9000 - env: - - name: DEST_PORT - value: '9000' - - name: DEST_IP - value: '0.0.0.0' + - name: destination-listen-and-echo + image: airbyte/destination-listen-and-echo:dev + ports: + - containerPort: 9000 + env: + - name: DEST_PORT + value: "9000" + - name: DEST_IP + value: "0.0.0.0" diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml index 873c94e4ecc74..297c33cce8788 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml @@ -5,6 +5,6 @@ metadata: spec: restartPolicy: Never containers: - - name: kube-sync-worker-test - image: airbyte/kube-sync-worker-test:dev - imagePullPolicy: Always + - name: kube-sync-worker-test + image: airbyte/kube-sync-worker-test:dev + imagePullPolicy: Always diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml index 8ba5d579b3f47..94d8e179636af 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml @@ -7,7 +7,7 @@ spec: initContainers: - name: init image: busybox:1.28 - command: [ 'sh', '-c', "mkfifo /pipes/stdin && mkfifo /pipes/stdout" ] + command: ["sh", "-c", "mkfifo /pipes/stdin && mkfifo /pipes/stdout"] volumeMounts: - name: airbyte-pipes mountPath: /pipes @@ -15,16 +15,16 @@ spec: - name: worker image: IMAGE workingDir: WORKDIR - command: [ 'sh', '-c', "cat /pipes/stdin | COMMAND > /pipes/stdout" ] + command: ["sh", "-c", "cat /pipes/stdin | COMMAND > /pipes/stdout"] args: ARGS volumeMounts: -# - name: airbyte-volume-workspace -# mountPath: /workspace + # - name: airbyte-volume-workspace + # mountPath: /workspace - name: airbyte-pipes mountPath: /pipes - name: socat image: alpine/socat:1.7.4.1-r1 - command: [ 'sh', '-c', "socat -d -d -d - TCP-L:9001 > /pipes/stdin" ] + command: ["sh", "-c", "socat -d -d -d - TCP-L:9001 > /pipes/stdin"] env: - name: POD_IP valueFrom: @@ -36,8 +36,8 @@ spec: - name: airbyte-pipes mountPath: /pipes volumes: -# - name: airbyte-volume-workspace -# persistentVolumeClaim: -# claimName: airbyte-volume-workspace + # - name: airbyte-volume-workspace + # persistentVolumeClaim: + # claimName: airbyte-volume-workspace - name: airbyte-pipes emptyDir: {} diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml index 56c6afcf9cd50..66800800a02e2 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml @@ -7,7 +7,7 @@ spec: initContainers: - name: init image: busybox:1.28 - command: [ 'sh', '-c', "mkfifo /pipes/stdin && mkfifo /pipes/stdout" ] + command: ["sh", "-c", "mkfifo /pipes/stdin && mkfifo /pipes/stdout"] volumeMounts: - name: airbyte-pipes mountPath: /pipes @@ -15,7 +15,7 @@ spec: - name: worker image: IMAGE workingDir: WORKDIR - command: [ 'sh', '-c', "COMMAND > /pipes/stdout" ] + command: ["sh", "-c", "COMMAND > /pipes/stdout"] args: ARGS volumeMounts: # - name: airbyte-volume-workspace @@ -24,7 +24,11 @@ spec: mountPath: /pipes - name: socat image: alpine/socat:1.7.4.1-r1 - command: [ 'sh', '-c', "cat /pipes/stdout | socat -d -d -d - TCP:host.docker.internal:9000" ] # todo: pass in the sync worker ip + command: [ + "sh", + "-c", + "cat /pipes/stdout | socat -d -d -d - TCP:host.docker.internal:9000", + ] # todo: pass in the sync worker ip env: - name: POD_IP valueFrom: From 01be8ef90ec9b3672fcbfede96345e1be14548b2 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 21 May 2021 19:07:46 +0800 Subject: [PATCH 17/34] Put back the original KubeProcessBuilderFactory. --- .../process/KubeProcessBuilderFactory.java | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java index 6ee1678edf68e..83af111cc8c52 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactory.java @@ -42,14 +42,13 @@ public class KubeProcessBuilderFactory implements ProcessBuilderFactory { private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactory.class); - private final String resourceName; - public KubeProcessBuilderFactory() { - this.resourceName = null; // todo: somehow make the different types of processes configurable - } + private static final Path WORKSPACE_MOUNT_DESTINATION = Path.of("/workspace"); + + private final Path workspaceRoot; - public KubeProcessBuilderFactory(String resourceName) { - this.resourceName = resourceName; + public KubeProcessBuilderFactory(Path workspaceRoot) { + this.workspaceRoot = workspaceRoot; } @Override @@ -57,21 +56,17 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina throws WorkerException { try { - final String template = MoreResources.readResource(resourceName); + final String template = MoreResources.readResource("kube_runner_template.yaml"); // used to differentiate source and destination processes with the same id and attempt final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); ObjectMapper yamlMapper = new ObjectMapper(new YAMLFactory()); - String command = getCommandFromImage(imageName); - LOGGER.info("Using entrypoint from image: " + command); - final String rendered = template.replaceAll("JOBID", jobId) .replaceAll("ATTEMPTID", String.valueOf(attempt)) .replaceAll("IMAGE", imageName) .replaceAll("SUFFIX", suffix) - .replaceAll("COMMAND", command) .replaceAll("ARGS", Jsons.serialize(Arrays.asList(args))) .replaceAll("WORKDIR", jobRoot.toString()); @@ -85,7 +80,7 @@ public ProcessBuilder create(String jobId, int attempt, final Path jobRoot, fina "kubectl", "run", "--generator=run-pod/v1", - // "--rm", todo: add this back in + "--rm", "-i", "--pod-running-timeout=24h", "--image=" + imageName, From e8849f76bd352154273430ade8bf329d20a2bec3 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 21 May 2021 19:11:17 +0800 Subject: [PATCH 18/34] Fix slight errors. --- .../src/main/java/io/airbyte/scheduler/app/SchedulerApp.java | 2 +- .../airbyte/workers/process/KubeProcessBuilderFactoryPOC.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java index 9931df4a4f88a..a3a70cf0356bb 100644 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java +++ b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java @@ -161,7 +161,7 @@ private void cleanupZombies(JobPersistence jobPersistence, JobNotifier jobNotifi private static ProcessBuilderFactory getProcessBuilderFactory(Configs configs) { if (configs.getWorkerEnvironment() == Configs.WorkerEnvironment.KUBERNETES) { - return new KubeProcessBuilderFactory(); + return new KubeProcessBuilderFactory(configs.getWorkspaceRoot()); } else { return new DockerProcessBuilderFactory( configs.getWorkspaceRoot(), diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 472b20f580b10..0a6075f75b593 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -38,8 +38,11 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class KubeProcessBuilderFactoryPOC { + private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactoryPOC.class); // todo: this should really be cached private static String getCommandFromImage(String imageName) throws IOException { From 2cdb8f59693a0eb9e95ae0bc24aedb147bb6d643 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 21 May 2021 19:53:47 +0800 Subject: [PATCH 19/34] Checkpoint: Worker pod knows its own IP. Successfully starts and writes to Dest pod after refactor. --- airbyte-workers/build.gradle | 2 +- .../process/KubeProcessBuilderFactoryPOC.java | 112 +++++++++++------- 2 files changed, 67 insertions(+), 47 deletions(-) diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index c8f27bfa3026c..6cebee9f80d21 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -7,7 +7,7 @@ plugins { } application { - mainClass = 'io.airbyte.workers.process.KubeProcessBuilderFactory' + mainClass = 'io.airbyte.workers.process.KubeProcessBuilderFactoryPOC' } configurations { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 0a6075f75b593..0fa299481ab7c 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -33,7 +33,9 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.PrintWriter; +import java.net.InetAddress; import java.net.Socket; +import java.nio.file.Path; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -42,8 +44,12 @@ import org.slf4j.LoggerFactory; public class KubeProcessBuilderFactoryPOC { + private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactoryPOC.class); + private static final KubernetesClient KUBE_CLIENT = new DefaultKubernetesClient(); + private static final int PORT = 9000; + // todo: this should really be cached private static String getCommandFromImage(String imageName) throws IOException { final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); @@ -87,48 +93,32 @@ private static String getCommandFromImage(String imageName) throws IOException { } } - public static void main(String[] args) throws InterruptedException, IOException { - // try { - // // todo: test this with args that are used by the process - // Process process = new KubeProcessBuilderFactory("stdout_template.yaml") - // .create(0L, 0, Path.of("/tmp"), "np_source:dev", null) - // .start(); - // - // process.getOutputStream().write(100); - // process.getInputStream().read(); - // - // // after running this main: - // // kubectl port-forward airbyte-worker-0-0-fmave 9000:9000 - // // socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout - // - // LOGGER.info("waiting..."); - // int code = process.waitFor(); - // LOGGER.info("code = " + code); - // } catch (Exception e) { - // LOGGER.error(e.getMessage()); - // e.printStackTrace(); - // } - - var PORT = 9000; - String IP = null; - var destPodName = "destination-listen-and-echo"; - KubernetesClient client = new DefaultKubernetesClient(); + private static void createPodAndWaitTillReady(String imageId) {} - // Load spec and create the pod. - var stream = KubeProcessBuilderFactory.class.getClassLoader().getResourceAsStream("destination-listen-and-echo.yaml"); - var destPodDef = client.pods().load(stream).get(); - LOGGER.info("Loaded spec: {}", destPodDef); + private static void saveJaredWork() { + try { + // todo: test this with args that are used by the process + Process process = new KubeProcessBuilderFactory(Path.of("stdout_template.yaml")) + .create(0L, 0, Path.of("/tmp"), "np_source:dev", null) + .start(); - var podSet = client.pods().inNamespace("default").list().getItems().stream() - .filter(pod -> pod.getMetadata().getName().equals(destPodName)).collect(Collectors.toSet()); - if (podSet.size() == 0) { - LOGGER.info("Pod does not exist"); - Pod destPod = client.pods().create(destPodDef); - LOGGER.info("Created pod: {}, waiting for it to be ready", destPod); - client.resource(destPod).waitUntilReady(1, TimeUnit.MINUTES); - LOGGER.info("Dest Pod ready"); + process.getOutputStream().write(100); + process.getInputStream().read(); + + // after running this main: + // kubectl port-forward airbyte-worker-0-0-fmave 9000:9000 + // socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout + + LOGGER.info("waiting..."); + int code = process.waitFor(); + LOGGER.info("code = " + code); + } catch (Exception e) { + LOGGER.error(e.getMessage()); + e.printStackTrace(); } + } + private static String getPodIP(String podName) { // TODO: Why does this not work? // LOGGER.info(destPod.getStatus().getPodIP()); // destPod = client.resource(destPod).get(); @@ -137,28 +127,58 @@ public static void main(String[] args) throws InterruptedException, IOException // IP = destPod.getStatus().getPodIP(); // TODO: Assign labels to pods to narrow the search. - PodList pods = client.pods().inNamespace("default").list(); + PodList pods = KUBE_CLIENT.pods().inNamespace("default").list(); for (Pod p : pods.getItems()) { LOGGER.info(p.getMetadata().getName()); LOGGER.info(p.getStatus().getPodIP()); // Filter by pod and retrieve IP. - if (p.getMetadata().getName().equals(destPodName)) { + if (p.getMetadata().getName().equals(podName)) { LOGGER.info("Found IP!"); - IP = p.getStatus().getPodIP(); - break; + return p.getStatus().getPodIP(); } } + return null; + } + + private static void createIfNotExisting(String podName, Pod def) throws InterruptedException { + LOGGER.info("Checking pod: {}", podName); + var podSet = KUBE_CLIENT.pods().inNamespace("default").list().getItems().stream() + .filter(pod -> pod.getMetadata().getName().equals(podName)).collect(Collectors.toSet()); + if (podSet.size() == 0) { + LOGGER.info("Pod {} does not exist", podName); + Pod destPod = KUBE_CLIENT.pods().create(def); + LOGGER.info("Created pod: {}, waiting for it to be ready", destPod); + KUBE_CLIENT.resource(destPod).waitUntilReady(1, TimeUnit.MINUTES); + LOGGER.info("Pod {} ready", podName); + } + } + + public static void main(String[] args) throws InterruptedException, IOException { + String myIp = InetAddress.getLocalHost().getHostAddress(); + LOGGER.info("Kube sync worker ip: {}", myIp); + + var destPodName = "destination-listen-and-echo"; + + // Load spec and create the pod. + var stream = KubeProcessBuilderFactoryPOC.class.getClassLoader().getResourceAsStream("kube_queue_poc/destination-listen-and-echo.yaml"); + var destPodDef = KUBE_CLIENT.pods().load(stream).get(); + LOGGER.info("Loaded spec: {}", destPodDef); + + createIfNotExisting(destPodName, destPodDef); + String destPodIp = getPodIP(destPodName); + LOGGER.info("Dest pod ip: {}", destPodIp); + // Send something! - var clientSocket = new Socket(IP, PORT); + var clientSocket = new Socket(destPodIp, PORT); var out = new PrintWriter(clientSocket.getOutputStream(), true); out.print("Hello!"); out.close(); - client.pods().delete(destPodDef); + KUBE_CLIENT.pods().delete(destPodDef); // TODO: Why does this wait not work? - client.resource(destPodDef).waitUntilCondition(pod -> !pod.getStatus().getPhase().equals("Terminating"), 1, TimeUnit.MINUTES); - client.close(); + KUBE_CLIENT.resource(destPodDef).waitUntilCondition(pod -> !pod.getStatus().getPhase().equals("Terminating"), 1, TimeUnit.MINUTES); + KUBE_CLIENT.close(); } } From a9bb1311f85ee461a77925ddd0a670c08fa24b4e Mon Sep 17 00:00:00 2001 From: jrhizor Date: Fri, 21 May 2021 07:02:40 -0700 Subject: [PATCH 20/34] remove unused file and update readme --- airbyte-workers/README.md | 15 +++++++++++++++ .../kube_queue_poc/command_fetcher_template.yaml | 1 - 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 airbyte-workers/README.md delete mode 100644 airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml diff --git a/airbyte-workers/README.md b/airbyte-workers/README.md new file mode 100644 index 0000000000000..1fcb03e6920c1 --- /dev/null +++ b/airbyte-workers/README.md @@ -0,0 +1,15 @@ +# Kube Queueing POC + +To build sync worker: +``` +cd ~/code/airbyte +./gradlew :airbyte-workers:airbyteDocker +cd ~/code/airbyte/airbyte-workers/docker-shim-mvp/destination-listen-and-echo +docker build -t airbyte/destination-listen-and-echo:dev . +``` + +To run the sync worker +``` +cd ~/code/airbyte/airbyte-workers/src/main/resources/kube_queue_poc +kubectl apply -f kube-sync-workers.yaml +``` diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml deleted file mode 100644 index ed97d539c095c..0000000000000 --- a/airbyte-workers/src/main/resources/kube_queue_poc/command_fetcher_template.yaml +++ /dev/null @@ -1 +0,0 @@ ---- From fc19affce1d93b897438d8a2dfedfe0f72996662 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 21 May 2021 23:24:07 +0800 Subject: [PATCH 21/34] Dest pod loops back into worker pod. However, the right messages do not seem to be passing in. --- airbyte-workers/Dockerfile | 3 + .../destination-listen-and-echo/Dockerfile | 1 + .../listen-and-echo.sh | 1 - .../process/KubeProcessBuilderFactoryPOC.java | 75 +++++++++++++++---- .../kube-destination-sample-pod.yaml | 42 +++++++++++ .../kube_queue_poc/kube-sync-workers.yaml | 5 ++ .../kube_queue_poc/stdin_stdout_template.yaml | 2 +- .../kube_queue_poc/stdout_template.yaml | 2 +- 8 files changed, 112 insertions(+), 19 deletions(-) create mode 100644 airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index bfb4eda8b18e2..8a0869166c6e6 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -2,6 +2,9 @@ FROM openjdk:14.0.2-slim WORKDIR /airbyte +# Just so we can run kubectl for now. +COPY --from=lachlanevenson/k8s-kubectl:v1.10.3 /usr/local/bin/kubectl /usr/local/bin/kubectl + COPY build/distributions/airbyte-workers*.tar airbyte-workers.tar RUN tar xf airbyte-workers.tar --strip-components=1 diff --git a/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/Dockerfile b/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/Dockerfile index f166afd62d0ba..201ee5bc02452 100644 --- a/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/Dockerfile +++ b/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/Dockerfile @@ -1,6 +1,7 @@ # Use the same java base to simulate a destination image. #FROM airbyte/integration-base-java:dev FROM debian:latest +ENV AIRBYTE_ENTRYPOINT="./listen-and-echo.sh" RUN apt-get update && apt-get install -y bash socat && rm -rf /var/lib/apt/lists/* diff --git a/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh b/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh index 5ae5c03c2aeeb..324a30a667f76 100755 --- a/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh +++ b/airbyte-workers/docker-shim-mvp/destination-listen-and-echo/listen-and-echo.sh @@ -1,4 +1,3 @@ #!/usr/bin/env bash -echo "ip: ${DEST_IP}", "port: ${DEST_PORT}" socat -d -d -d TCP-LISTEN:"${DEST_PORT}",bind="${DEST_IP}" stdout diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 0fa299481ab7c..734622f890e0a 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -24,19 +24,28 @@ package io.airbyte.workers.process; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.google.common.collect.Lists; import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.PodList; import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; import java.io.BufferedReader; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.InetAddress; +import java.net.ServerSocket; import java.net.Socket; import java.nio.file.Path; import java.util.List; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; @@ -48,7 +57,7 @@ public class KubeProcessBuilderFactoryPOC { private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactoryPOC.class); private static final KubernetesClient KUBE_CLIENT = new DefaultKubernetesClient(); - private static final int PORT = 9000; + private static final int PORT = 9001; // todo: this should really be cached private static String getCommandFromImage(String imageName) throws IOException { @@ -119,18 +128,16 @@ private static void saveJaredWork() { } private static String getPodIP(String podName) { - // TODO: Why does this not work? + // TODO: Why does directly searching for the pod not work? // LOGGER.info(destPod.getStatus().getPodIP()); // destPod = client.resource(destPod).get(); // LOGGER.info("Status: {}", destPod.getStatus()); // LOGGER.info("IP: {}", destPod.getStatus().getPodIP()); // IP = destPod.getStatus().getPodIP(); - // TODO: Assign labels to pods to narrow the search. + // TODO: We could assign labels to pods to narrow the search. PodList pods = KUBE_CLIENT.pods().inNamespace("default").list(); for (Pod p : pods.getItems()) { - LOGGER.info(p.getMetadata().getName()); - LOGGER.info(p.getStatus().getPodIP()); // Filter by pod and retrieve IP. if (p.getMetadata().getName().equals(podName)) { LOGGER.info("Found IP!"); @@ -141,6 +148,29 @@ private static String getPodIP(String podName) { return null; } + // TODO: It might be easier to do this using the same Socat pattern we use in the + // Dockerfile and be reading from a file. + private static void startListeningOnPort(int port) { + Executors.newSingleThreadExecutor().submit(() -> { + try (var serverSocket = new ServerSocket(port)) { + LOGGER.info("Created server and waiting for connection.."); + var socket = serverSocket.accept(); + LOGGER.info("Accepted connection!"); + var input = socket.getInputStream(); + BufferedReader reader = new BufferedReader(new InputStreamReader(input)); + while(!reader.ready()) + while (true) { + final var line = reader.readLine(); + if (line == null) break; + + LOGGER.info("Destination sent: {}", line); + } + } catch (IOException e) { + LOGGER.error("Error starting socket reader: ",e); + } + }); + } + private static void createIfNotExisting(String podName, Pod def) throws InterruptedException { LOGGER.info("Checking pod: {}", podName); var podSet = KUBE_CLIENT.pods().inNamespace("default").list().getItems().stream() @@ -148,32 +178,41 @@ private static void createIfNotExisting(String podName, Pod def) throws Interrup if (podSet.size() == 0) { LOGGER.info("Pod {} does not exist", podName); Pod destPod = KUBE_CLIENT.pods().create(def); - LOGGER.info("Created pod: {}, waiting for it to be ready", destPod); + LOGGER.info("Created pod: {}, waiting for it to be ready", destPod.getMetadata().getName()); KUBE_CLIENT.resource(destPod).waitUntilReady(1, TimeUnit.MINUTES); LOGGER.info("Pod {} ready", podName); } } - public static void main(String[] args) throws InterruptedException, IOException { + private static void runSampleKubeWorker() throws InterruptedException, IOException { String myIp = InetAddress.getLocalHost().getHostAddress(); LOGGER.info("Kube sync worker ip: {}", myIp); - var destPodName = "destination-listen-and-echo"; + var destPodName = "kube-destination-sample"; - // Load spec and create the pod. - var stream = KubeProcessBuilderFactoryPOC.class.getClassLoader().getResourceAsStream("kube_queue_poc/destination-listen-and-echo.yaml"); - var destPodDef = KUBE_CLIENT.pods().load(stream).get(); - LOGGER.info("Loaded spec: {}", destPodDef); + // Load spec and swap in worker ip. + var template = MoreResources.readResource("kube_queue_poc/kube-destination-sample-pod.yaml"); + var rendered = template.replaceAll("WORKER_IP", myIp); + var renderedStream = new ByteArrayInputStream(rendered.getBytes()); + var destPodDef = KUBE_CLIENT.pods().load(renderedStream).get(); + LOGGER.info("Loaded spec"); + + // TODO: 1) The container image needs to line up with the actual spec. 2) Why isn't this working? + //var containers = destPodDef.getSpec().getContainers(); + //getCommandFromImage(containers.get(0).getImage()); + + // Start a listening server for the Destination to connect to. + startListeningOnPort(9001); createIfNotExisting(destPodName, destPodDef); String destPodIp = getPodIP(destPodName); LOGGER.info("Dest pod ip: {}", destPodIp); // Send something! - var clientSocket = new Socket(destPodIp, PORT); - var out = new PrintWriter(clientSocket.getOutputStream(), true); - out.print("Hello!"); - out.close(); + var socketToDestStdIo = new Socket(destPodIp, PORT); + var toDest = new PrintWriter(socketToDestStdIo.getOutputStream(), true); + toDest.print("Hello!"); + toDest.close(); KUBE_CLIENT.pods().delete(destPodDef); // TODO: Why does this wait not work? @@ -181,4 +220,8 @@ public static void main(String[] args) throws InterruptedException, IOException KUBE_CLIENT.close(); } + public static void main(String[] args) throws InterruptedException, IOException { + runSampleKubeWorker(); + } + } diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml new file mode 100644 index 0000000000000..88e18f2baf4d3 --- /dev/null +++ b/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: Pod +metadata: + name: kube-destination-sample +spec: + restartPolicy: Never + initContainers: + - name: init + image: busybox:1.28 + command: ["sh", "-c", "mkfifo /pipes/stdin && mkfifo /pipes/stdout"] + volumeMounts: + - name: airbyte-pipes + mountPath: /pipes + containers: + - name: destination + image: airbyte/destination-listen-and-echo:dev + imagePullPolicy: Always + command: ["sh", "-c", "cat /pipes/stdin | ./listen-and-echo.sh > /pipes/stdout"] + volumeMounts: + - name: airbyte-pipes + mountPath: /pipes + - name: socat-port-to-destination-in + image: alpine/socat:1.7.4.1-r1 + command: ["sh", "-c", "socat -d -d -d - TCP-L:9001 > /pipes/stdin"] + ports: + - containerPort: 9001 + volumeMounts: + - name: airbyte-pipes + mountPath: /pipes + - name: destination-out-to-socat-port + image: alpine/socat:1.7.4.1-r1 + command: [ + "sh", + "-c", + "cat /pipes/stdout | socat -d -d -d - TCP:10.104.0.57:9001", + ] + volumeMounts: + - name: airbyte-pipes + mountPath: /pipes + volumes: + - name: airbyte-pipes + emptyDir: {} diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml index 297c33cce8788..672a308feafc4 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml @@ -8,3 +8,8 @@ spec: - name: kube-sync-worker-test image: airbyte/kube-sync-worker-test:dev imagePullPolicy: Always + ports: + - containerPort: 9000 + name: source-port + - containerPort: 9001 + name: dest-port diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml index 94d8e179636af..555e2aee8e4d0 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml @@ -31,7 +31,7 @@ spec: fieldRef: fieldPath: status.podIP ports: - - containerPort: 9000 + - containerPort: 9001 volumeMounts: - name: airbyte-pipes mountPath: /pipes diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml index 66800800a02e2..6f4b196996b92 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml @@ -28,7 +28,7 @@ spec: "sh", "-c", "cat /pipes/stdout | socat -d -d -d - TCP:host.docker.internal:9000", - ] # todo: pass in the sync worker ip + ] env: - name: POD_IP valueFrom: From 618009992319b2a246ab290d0e9da6e9deea737c Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 21 May 2021 23:40:54 +0800 Subject: [PATCH 22/34] Switch back to worker ip. --- .../resources/kube_queue_poc/kube-destination-sample-pod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml index 88e18f2baf4d3..2d553d58e13f7 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml @@ -32,7 +32,7 @@ spec: command: [ "sh", "-c", - "cat /pipes/stdout | socat -d -d -d - TCP:10.104.0.57:9001", + "cat /pipes/stdout | socat -d -d -d - TCP:WORKER_IP:9001", ] volumeMounts: - name: airbyte-pipes From 4109ef748272e9734cc004d948ac0796dfb7a3de Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Sat, 22 May 2021 00:48:38 +0800 Subject: [PATCH 23/34] SWEET VICTORY!. --- .../process/KubeProcessBuilderFactoryPOC.java | 13 +++++++++---- .../kube_queue_poc/kube-destination-sample-pod.yaml | 9 ++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 734622f890e0a..af2a296162ab6 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -158,7 +158,7 @@ private static void startListeningOnPort(int port) { LOGGER.info("Accepted connection!"); var input = socket.getInputStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(input)); - while(!reader.ready()) +// while(!reader.ready()) while (true) { final var line = reader.readLine(); if (line == null) break; @@ -198,8 +198,9 @@ private static void runSampleKubeWorker() throws InterruptedException, IOExcepti LOGGER.info("Loaded spec"); // TODO: 1) The container image needs to line up with the actual spec. 2) Why isn't this working? - //var containers = destPodDef.getSpec().getContainers(); - //getCommandFromImage(containers.get(0).getImage()); +// var containers = destPodDef.getSpec().getContainers(); +// System.out.println(containers.get(0)); +// getCommandFromImage(containers.get(0).getImage()); // Start a listening server for the Destination to connect to. startListeningOnPort(9001); @@ -211,7 +212,11 @@ private static void runSampleKubeWorker() throws InterruptedException, IOExcepti // Send something! var socketToDestStdIo = new Socket(destPodIp, PORT); var toDest = new PrintWriter(socketToDestStdIo.getOutputStream(), true); - toDest.print("Hello!"); + toDest.println("Hello!"); + toDest.println("a!"); + toDest.println("b!"); + toDest.println("c!"); + toDest.println("d!"); toDest.close(); KUBE_CLIENT.pods().delete(destPodDef); diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml index 2d553d58e13f7..624956e789b6e 100644 --- a/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml +++ b/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml @@ -13,15 +13,15 @@ spec: mountPath: /pipes containers: - name: destination - image: airbyte/destination-listen-and-echo:dev + image: airbyte/np-dest:dev imagePullPolicy: Always - command: ["sh", "-c", "cat /pipes/stdin | ./listen-and-echo.sh > /pipes/stdout"] + command: ["sh", "-c", "cat /pipes/stdin | /tmp/run.sh > /pipes/stdout"] volumeMounts: - name: airbyte-pipes mountPath: /pipes - name: socat-port-to-destination-in image: alpine/socat:1.7.4.1-r1 - command: ["sh", "-c", "socat -d -d -d - TCP-L:9001 > /pipes/stdin"] + command: ["sh", "-c", "socat -d -d -d TCP-L:9001 STDOUT > /pipes/stdin"] ports: - containerPort: 9001 volumeMounts: @@ -37,6 +37,9 @@ spec: volumeMounts: - name: airbyte-pipes mountPath: /pipes + - name: stay-awake + image: alpine/socat:1.7.4.1-r1 + command: ["sh", "-c", "sleep 10000000"] volumes: - name: airbyte-pipes emptyDir: {} From b447fe8085d5a64e812ff033544addd10a0f5039 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Mon, 24 May 2021 20:27:19 -0700 Subject: [PATCH 24/34] wrap kube pod in process (#3540) also clean up kubernetes deploys. --- airbyte-workers/Dockerfile | 2 +- .../workers/process/KubePodProcess.java | 230 ++++++++++++++++++ .../process/KubeProcessBuilderFactoryPOC.java | 128 +++------- .../destination-listen-and-echo.yaml | 17 -- .../kube-destination-sample-pod.yaml | 45 ---- .../kube_queue_poc/kube-sync-workers.yaml | 15 -- .../kube_queue_poc/launch/airbyte-worker.yaml | 15 ++ .../default_service_account_perms.yaml | 0 .../kube_queue_poc/stdin_stdout_template.yaml | 43 ---- .../kube_queue_poc/stdout_template.yaml | 47 ---- 10 files changed, 285 insertions(+), 257 deletions(-) create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java delete mode 100644 airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml delete mode 100644 airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml delete mode 100644 airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml create mode 100644 airbyte-workers/src/main/resources/kube_queue_poc/launch/airbyte-worker.yaml rename airbyte-workers/src/main/resources/kube_queue_poc/{ => launch}/default_service_account_perms.yaml (100%) delete mode 100644 airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml delete mode 100644 airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 8a0869166c6e6..07450a191d7fe 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -12,4 +12,4 @@ RUN tar xf airbyte-workers.tar --strip-components=1 ENTRYPOINT ["./bin/airbyte-workers"] LABEL io.airbyte.version=0.1.0 -LABEL io.airbyte.name=airbyte/kube-sync-worker-test +LABEL io.airbyte.name=airbyte/worker diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java new file mode 100644 index 0000000000000..e842b74b66f86 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -0,0 +1,230 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.workers.process; + +import com.google.common.base.Preconditions; +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerBuilder; +import io.fabric8.kubernetes.api.model.DeletionPropagation; +import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodBuilder; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeBuilder; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.VolumeMountBuilder; +import io.fabric8.kubernetes.client.KubernetesClient; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.InetAddress; +import java.net.ServerSocket; +import java.net.Socket; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import org.apache.commons.io.output.NullOutputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class KubePodProcess extends Process { + + private static final Logger LOGGER = LoggerFactory.getLogger(KubePodProcess.class); + + private static final int STDIN_REMOTE_PORT = 9001; + + private final KubernetesClient client; + private final Pod podDefinition; + + private final OutputStream stdin; + private InputStream stdout; + private final ServerSocket stdoutServerSocket; + private final ExecutorService executorService; + + public KubePodProcess(KubernetesClient client, String podName, String image, int stdoutLocalPort, boolean usesStdin) + throws IOException, InterruptedException { + this.client = client; + + // allow reading stdout from pod + LOGGER.info("Creating socket server..."); + this.stdoutServerSocket = new ServerSocket(stdoutLocalPort); + + executorService = Executors.newSingleThreadExecutor(); + executorService.submit(() -> { + try { + LOGGER.info("Creating socket from server..."); + var socket = stdoutServerSocket.accept(); // blocks until connected + LOGGER.info("Setting stdout..."); + this.stdout = socket.getInputStream(); + } catch (IOException e) { + e.printStackTrace(); // todo: propagate exception / join at the end of constructor + } + }); + + // create pod + String entrypoint = KubeProcessBuilderFactoryPOC.getCommandFromImage(image); + + Volume volume = new VolumeBuilder() + .withName("airbyte-pipes") + .withNewEmptyDir() + .endEmptyDir() + .build(); + + VolumeMount volumeMount = new VolumeMountBuilder() + .withName("airbyte-pipes") + .withMountPath("/pipes") + .build(); + + Container initContainer = new ContainerBuilder() + .withName("init") + .withImage("busybox:1.28") + .withCommand("sh", "-c", usesStdin ? "mkfifo /pipes/stdin && mkfifo /pipes/stdout" : "mkfifo /pipes/stdout") + .withVolumeMounts(volumeMount) + .build(); + + Container main = new ContainerBuilder() + .withName("main") + .withImage(image) + .withCommand("sh", "-c", usesStdin ? "cat /pipes/stdin | " + entrypoint + " > /pipes/stdout" : entrypoint + " > /pipes/stdout") + .withVolumeMounts(volumeMount) + .build(); + + Container remoteStdin = new ContainerBuilder() + .withName("remote-stdin") + .withImage("alpine/socat:1.7.4.1-r1") + .withCommand("sh", "-c", "socat -d -d -d TCP-L:9001 STDOUT > /pipes/stdin") + .withVolumeMounts(volumeMount) + .build(); + + Container relayStdout = new ContainerBuilder() + .withName("relay-stdout") + .withImage("alpine/socat:1.7.4.1-r1") + .withCommand("sh", "-c", "cat /pipes/stdout | socat -d -d -d - TCP:" + InetAddress.getLocalHost().getHostAddress() + ":" + stdoutLocalPort) + .withVolumeMounts(volumeMount) + .build(); + + List containers = usesStdin ? List.of(main, remoteStdin, relayStdout) : List.of(main, relayStdout); + + Pod pod = new PodBuilder() + .withApiVersion("v1") + .withNewMetadata() + .withName(podName) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Never") + .withInitContainers(initContainer) + .withContainers(containers) + .withVolumes(volume) + .endSpec() + .build(); + + LOGGER.info("Creating pod..."); + this.podDefinition = client.pods().inNamespace("default").createOrReplace(pod); + + LOGGER.info("Waiting until pod is ready..."); + client.resource(podDefinition).waitUntilReady(5, TimeUnit.MINUTES); + + // allow writing stdin to pod + LOGGER.info("Reading pod IP..."); + var podIp = KubeProcessBuilderFactoryPOC.getPodIP(podName); + LOGGER.info("Pod IP: {}", podIp); + + if (usesStdin) { + LOGGER.info("Creating stdin socket..."); + var socketToDestStdIo = new Socket(podIp, STDIN_REMOTE_PORT); + this.stdin = socketToDestStdIo.getOutputStream(); + } else { + LOGGER.info("Using null stdin output stream..."); + this.stdin = NullOutputStream.NULL_OUTPUT_STREAM; + } + } + + @Override + public OutputStream getOutputStream() { + return this.stdin; + } + + @Override + public InputStream getInputStream() { + return this.stdout; + } + + @Override + public InputStream getErrorStream() { + // there is no error stream equivalent for Kube-based processes so we use a null stream here + return InputStream.nullInputStream(); + } + + @Override + public int waitFor() throws InterruptedException { + client.resource(podDefinition).waitUntilCondition(this::isTerminal, 10, TimeUnit.DAYS); + return exitValue(); + } + + private boolean isTerminal(Pod pod) { + if (pod.getStatus() != null) { + return pod.getStatus() + .getContainerStatuses() + .stream() + .anyMatch(e -> e.getState() != null && e.getState().getTerminated() != null); + } else { + return false; + } + } + + private int getReturnCode(Pod pod) { + Pod refreshedPod = client.pods().withName(pod.getMetadata().getName()).get(); // todo: use more robust version here + Preconditions.checkArgument(isTerminal(refreshedPod)); + + return refreshedPod.getStatus().getContainerStatuses() + .stream() + .filter(containerStatus -> containerStatus.getState() != null && containerStatus.getState().getTerminated() != null) + .map(containerStatus -> { + int statusCode = containerStatus.getState().getTerminated().getExitCode(); + LOGGER.info("Termination status for container " + containerStatus.getName() + " is " + statusCode); + return statusCode; + }) + .reduce(Integer::sum) + .orElseThrow(); + } + + @Override + public int exitValue() { + return getReturnCode(podDefinition); + } + + @Override + public void destroy() { + try { + stdoutServerSocket.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } finally { + executorService.shutdown(); + client.resource(podDefinition).withPropagationPolicy(DeletionPropagation.FOREGROUND).delete(); + } + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index af2a296162ab6..68842b69b91f6 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -24,30 +24,20 @@ package io.airbyte.workers.process; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.google.common.collect.Lists; import io.airbyte.commons.io.IOs; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.resources.MoreResources; import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.PodList; import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; import java.io.BufferedReader; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; -import java.net.InetAddress; -import java.net.ServerSocket; -import java.net.Socket; import java.nio.file.Path; import java.util.List; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,10 +47,9 @@ public class KubeProcessBuilderFactoryPOC { private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactoryPOC.class); private static final KubernetesClient KUBE_CLIENT = new DefaultKubernetesClient(); - private static final int PORT = 9001; // todo: this should really be cached - private static String getCommandFromImage(String imageName) throws IOException { + public static String getCommandFromImage(String imageName) throws IOException { final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); final String podName = "airbyte-command-fetcher-" + suffix; @@ -70,7 +59,7 @@ private static String getCommandFromImage(String imageName) throws IOException { "kubectl", "run", "--generator=run-pod/v1", - "--rm", + // "--rm", "-i", "--pod-running-timeout=24h", "--image=" + imageName, @@ -102,8 +91,6 @@ private static String getCommandFromImage(String imageName) throws IOException { } } - private static void createPodAndWaitTillReady(String imageId) {} - private static void saveJaredWork() { try { // todo: test this with args that are used by the process @@ -127,7 +114,7 @@ private static void saveJaredWork() { } } - private static String getPodIP(String podName) { + public static String getPodIP(String podName) { // TODO: Why does directly searching for the pod not work? // LOGGER.info(destPod.getStatus().getPodIP()); // destPod = client.resource(destPod).get(); @@ -148,85 +135,48 @@ private static String getPodIP(String podName) { return null; } - // TODO: It might be easier to do this using the same Socat pattern we use in the - // Dockerfile and be reading from a file. - private static void startListeningOnPort(int port) { - Executors.newSingleThreadExecutor().submit(() -> { - try (var serverSocket = new ServerSocket(port)) { - LOGGER.info("Created server and waiting for connection.."); - var socket = serverSocket.accept(); - LOGGER.info("Accepted connection!"); - var input = socket.getInputStream(); - BufferedReader reader = new BufferedReader(new InputStreamReader(input)); -// while(!reader.ready()) - while (true) { - final var line = reader.readLine(); - if (line == null) break; - - LOGGER.info("Destination sent: {}", line); + public static void main(String[] args) throws InterruptedException, IOException { + LOGGER.info("Launching source process..."); + Process src = new KubePodProcess(KUBE_CLIENT, "src", "np_source:dev", 9002, false); + + LOGGER.info("Launching destination process..."); + Process dest = new KubePodProcess(KUBE_CLIENT, "dest", "np_dest:dev", 9003, true); + + LOGGER.info("Launching background thread to read destination lines..."); + ExecutorService executor = Executors.newSingleThreadExecutor(); + executor.submit(() -> { + BufferedReader reader = new BufferedReader(new InputStreamReader(dest.getInputStream())); + + while (true) { + try { + String line; + if ((line = reader.readLine()) != null) { + LOGGER.info("Destination sent: {}", line); + } + } catch (IOException e) { + e.printStackTrace(); } - } catch (IOException e) { - LOGGER.error("Error starting socket reader: ",e); } }); - } - private static void createIfNotExisting(String podName, Pod def) throws InterruptedException { - LOGGER.info("Checking pod: {}", podName); - var podSet = KUBE_CLIENT.pods().inNamespace("default").list().getItems().stream() - .filter(pod -> pod.getMetadata().getName().equals(podName)).collect(Collectors.toSet()); - if (podSet.size() == 0) { - LOGGER.info("Pod {} does not exist", podName); - Pod destPod = KUBE_CLIENT.pods().create(def); - LOGGER.info("Created pod: {}, waiting for it to be ready", destPod.getMetadata().getName()); - KUBE_CLIENT.resource(destPod).waitUntilReady(1, TimeUnit.MINUTES); - LOGGER.info("Pod {} ready", podName); + LOGGER.info("Copying source stdout to destination stdin..."); + + BufferedReader reader = IOs.newBufferedReader(src.getInputStream()); + PrintWriter writer = new PrintWriter(dest.getOutputStream(), true); + + String line; + while ((line = reader.readLine()) != null) { + writer.println(line); } - } + writer.close(); - private static void runSampleKubeWorker() throws InterruptedException, IOException { - String myIp = InetAddress.getLocalHost().getHostAddress(); - LOGGER.info("Kube sync worker ip: {}", myIp); - - var destPodName = "kube-destination-sample"; - - // Load spec and swap in worker ip. - var template = MoreResources.readResource("kube_queue_poc/kube-destination-sample-pod.yaml"); - var rendered = template.replaceAll("WORKER_IP", myIp); - var renderedStream = new ByteArrayInputStream(rendered.getBytes()); - var destPodDef = KUBE_CLIENT.pods().load(renderedStream).get(); - LOGGER.info("Loaded spec"); - - // TODO: 1) The container image needs to line up with the actual spec. 2) Why isn't this working? -// var containers = destPodDef.getSpec().getContainers(); -// System.out.println(containers.get(0)); -// getCommandFromImage(containers.get(0).getImage()); - - // Start a listening server for the Destination to connect to. - startListeningOnPort(9001); - - createIfNotExisting(destPodName, destPodDef); - String destPodIp = getPodIP(destPodName); - LOGGER.info("Dest pod ip: {}", destPodIp); - - // Send something! - var socketToDestStdIo = new Socket(destPodIp, PORT); - var toDest = new PrintWriter(socketToDestStdIo.getOutputStream(), true); - toDest.println("Hello!"); - toDest.println("a!"); - toDest.println("b!"); - toDest.println("c!"); - toDest.println("d!"); - toDest.close(); - - KUBE_CLIENT.pods().delete(destPodDef); - // TODO: Why does this wait not work? - KUBE_CLIENT.resource(destPodDef).waitUntilCondition(pod -> !pod.getStatus().getPhase().equals("Terminating"), 1, TimeUnit.MINUTES); - KUBE_CLIENT.close(); - } + LOGGER.info("Waiting for source..."); + src.waitFor(); + LOGGER.info("Waiting for destination..."); + dest.waitFor(); + LOGGER.info("Done!"); - public static void main(String[] args) throws InterruptedException, IOException { - runSampleKubeWorker(); + System.exit(0); // todo: handle executors so we don't need to kill the JVM } } diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml deleted file mode 100644 index d8610aa381684..0000000000000 --- a/airbyte-workers/src/main/resources/kube_queue_poc/destination-listen-and-echo.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: destination-listen-and-echo - namespace: default -spec: - restartPolicy: Never - containers: - - name: destination-listen-and-echo - image: airbyte/destination-listen-and-echo:dev - ports: - - containerPort: 9000 - env: - - name: DEST_PORT - value: "9000" - - name: DEST_IP - value: "0.0.0.0" diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml deleted file mode 100644 index 624956e789b6e..0000000000000 --- a/airbyte-workers/src/main/resources/kube_queue_poc/kube-destination-sample-pod.yaml +++ /dev/null @@ -1,45 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: kube-destination-sample -spec: - restartPolicy: Never - initContainers: - - name: init - image: busybox:1.28 - command: ["sh", "-c", "mkfifo /pipes/stdin && mkfifo /pipes/stdout"] - volumeMounts: - - name: airbyte-pipes - mountPath: /pipes - containers: - - name: destination - image: airbyte/np-dest:dev - imagePullPolicy: Always - command: ["sh", "-c", "cat /pipes/stdin | /tmp/run.sh > /pipes/stdout"] - volumeMounts: - - name: airbyte-pipes - mountPath: /pipes - - name: socat-port-to-destination-in - image: alpine/socat:1.7.4.1-r1 - command: ["sh", "-c", "socat -d -d -d TCP-L:9001 STDOUT > /pipes/stdin"] - ports: - - containerPort: 9001 - volumeMounts: - - name: airbyte-pipes - mountPath: /pipes - - name: destination-out-to-socat-port - image: alpine/socat:1.7.4.1-r1 - command: [ - "sh", - "-c", - "cat /pipes/stdout | socat -d -d -d - TCP:WORKER_IP:9001", - ] - volumeMounts: - - name: airbyte-pipes - mountPath: /pipes - - name: stay-awake - image: alpine/socat:1.7.4.1-r1 - command: ["sh", "-c", "sleep 10000000"] - volumes: - - name: airbyte-pipes - emptyDir: {} diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml deleted file mode 100644 index 672a308feafc4..0000000000000 --- a/airbyte-workers/src/main/resources/kube_queue_poc/kube-sync-workers.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: kube-sync-worker-test -spec: - restartPolicy: Never - containers: - - name: kube-sync-worker-test - image: airbyte/kube-sync-worker-test:dev - imagePullPolicy: Always - ports: - - containerPort: 9000 - name: source-port - - containerPort: 9001 - name: dest-port diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/launch/airbyte-worker.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/launch/airbyte-worker.yaml new file mode 100644 index 0000000000000..d7a8c5268dd25 --- /dev/null +++ b/airbyte-workers/src/main/resources/kube_queue_poc/launch/airbyte-worker.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: sync-worker +spec: + restartPolicy: Never + containers: + - name: sync-worker + image: airbyte/worker:dev + # imagePullPolicy: Always -> prevents local images from working + ports: + - containerPort: 9002 + name: source-port + - containerPort: 9003 + name: dest-port diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/default_service_account_perms.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/launch/default_service_account_perms.yaml similarity index 100% rename from airbyte-workers/src/main/resources/kube_queue_poc/default_service_account_perms.yaml rename to airbyte-workers/src/main/resources/kube_queue_poc/launch/default_service_account_perms.yaml diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml deleted file mode 100644 index 555e2aee8e4d0..0000000000000 --- a/airbyte-workers/src/main/resources/kube_queue_poc/stdin_stdout_template.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: airbyte-worker-JOBID-ATTEMPTID-SUFFIX -spec: - restartPolicy: Never - initContainers: - - name: init - image: busybox:1.28 - command: ["sh", "-c", "mkfifo /pipes/stdin && mkfifo /pipes/stdout"] - volumeMounts: - - name: airbyte-pipes - mountPath: /pipes - containers: - - name: worker - image: IMAGE - workingDir: WORKDIR - command: ["sh", "-c", "cat /pipes/stdin | COMMAND > /pipes/stdout"] - args: ARGS - volumeMounts: - # - name: airbyte-volume-workspace - # mountPath: /workspace - - name: airbyte-pipes - mountPath: /pipes - - name: socat - image: alpine/socat:1.7.4.1-r1 - command: ["sh", "-c", "socat -d -d -d - TCP-L:9001 > /pipes/stdin"] - env: - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - ports: - - containerPort: 9001 - volumeMounts: - - name: airbyte-pipes - mountPath: /pipes - volumes: - # - name: airbyte-volume-workspace - # persistentVolumeClaim: - # claimName: airbyte-volume-workspace - - name: airbyte-pipes - emptyDir: {} diff --git a/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml b/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml deleted file mode 100644 index 6f4b196996b92..0000000000000 --- a/airbyte-workers/src/main/resources/kube_queue_poc/stdout_template.yaml +++ /dev/null @@ -1,47 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: airbyte-worker-JOBID-ATTEMPTID-SUFFIX -spec: - restartPolicy: Never - initContainers: - - name: init - image: busybox:1.28 - command: ["sh", "-c", "mkfifo /pipes/stdin && mkfifo /pipes/stdout"] - volumeMounts: - - name: airbyte-pipes - mountPath: /pipes - containers: - - name: worker - image: IMAGE - workingDir: WORKDIR - command: ["sh", "-c", "COMMAND > /pipes/stdout"] - args: ARGS - volumeMounts: - # - name: airbyte-volume-workspace - # mountPath: /workspace - - name: airbyte-pipes - mountPath: /pipes - - name: socat - image: alpine/socat:1.7.4.1-r1 - command: [ - "sh", - "-c", - "cat /pipes/stdout | socat -d -d -d - TCP:host.docker.internal:9000", - ] - env: - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - ports: - - containerPort: 9000 - volumeMounts: - - name: airbyte-pipes - mountPath: /pipes - volumes: - # - name: airbyte-volume-workspace - # persistentVolumeClaim: - # claimName: airbyte-volume-workspace - - name: airbyte-pipes - emptyDir: {} From 5517a10bb7030cf1b57bb4c77b136448de2f8dc1 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 16:20:29 +0800 Subject: [PATCH 25/34] Checkpoint: create getCommandFromImage test. --- .../workers/process/KubePodProcess.java | 1 + .../process/KubeProcessBuilderFactoryPOC.java | 3 +- .../KubeProcessBuilderFactoryPOCTest.java | 50 +++++++++++++++++++ .../test/resources/dockerfile-with-env-var | 3 ++ 4 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 airbyte-workers/src/test/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOCTest.java create mode 100644 airbyte-workers/src/test/resources/dockerfile-with-env-var diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index e842b74b66f86..74a7a2b13a573 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -85,6 +85,7 @@ public KubePodProcess(KubernetesClient client, String podName, String image, int // create pod String entrypoint = KubeProcessBuilderFactoryPOC.getCommandFromImage(image); + LOGGER.info("Found entrypoint: {}", entrypoint); Volume volume = new VolumeBuilder() .withName("airbyte-pipes") diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 68842b69b91f6..05c56d2c76198 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -32,6 +32,7 @@ import io.fabric8.kubernetes.client.KubernetesClient; import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.nio.file.Path; @@ -58,7 +59,7 @@ public static String getCommandFromImage(String imageName) throws IOException { Lists.newArrayList( "kubectl", "run", - "--generator=run-pod/v1", +// "--generator=run-pod/v1", // "--rm", "-i", "--pod-running-timeout=24h", diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOCTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOCTest.java new file mode 100644 index 0000000000000..071f74c317999 --- /dev/null +++ b/airbyte-workers/src/test/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOCTest.java @@ -0,0 +1,50 @@ +package io.airbyte.workers.process; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.IOException; +import java.util.Map; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.images.builder.ImageFromDockerfile; + +public class KubeProcessBuilderFactoryPOCTest { + private static final String ENTRYPOINT = "/tmp/run.sh"; + private static final String TEST_IMAGE_NAME = "np_dest:dev"; + + @BeforeAll + public static void setup() { + // TODO(Davin): Why does building the container ahead doesn't work? +// new GenericContainer( +// new ImageFromDockerfile(TEST_IMAGE_NAME, false) +// .withDockerfileFromBuilder(builder -> { +// builder +// .from("debian") +// .env(Map.of("AIRBYTE_ENTRYPOINT", ENTRYPOINT)) +// .entryPoint(ENTRYPOINT) +// .build();})).withEnv("AIRBYTE_ENTRYPOINT", ENTRYPOINT); + } + + @Test + @DisplayName("Should error if image does not have the right env var set.") + public void testGetCommandFromImageNoCommand() { + assertThrows(RuntimeException.class, () -> KubeProcessBuilderFactoryPOC.getCommandFromImage("hello-world")); + } + + @Test + @DisplayName("Should error if image does not exists.") + public void testGetCommandFromImageBadImage() { + assertThrows(RuntimeException.class, () -> KubeProcessBuilderFactoryPOC.getCommandFromImage("bad_missing_image")); + } + + @Test + @DisplayName("Should retrieve the right command if image has the right env var set.") + public void testGetCommandFromImageCommandPresent() throws IOException { + var command = KubeProcessBuilderFactoryPOC.getCommandFromImage(TEST_IMAGE_NAME); + assertEquals(ENTRYPOINT, command); + } + +} diff --git a/airbyte-workers/src/test/resources/dockerfile-with-env-var b/airbyte-workers/src/test/resources/dockerfile-with-env-var new file mode 100644 index 0000000000000..56a00b3a4adcb --- /dev/null +++ b/airbyte-workers/src/test/resources/dockerfile-with-env-var @@ -0,0 +1,3 @@ +FROM hello-world + +ENV AIRBYTE_ENTRYPOINT="/tmp/run.sh" From fb41ce1eda5e22b78db7dc9e4af6dc6d8d1e7f44 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 17:06:43 +0800 Subject: [PATCH 26/34] Update test containers. --- airbyte-workers/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index 6cebee9f80d21..ac6e009a3d8b0 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -30,7 +30,7 @@ dependencies { implementation project(':airbyte-protocol:models') testImplementation 'org.mockito:mockito-inline:2.13.0' - testImplementation 'org.testcontainers:testcontainers:1.14.3' + testImplementation 'org.testcontainers:testcontainers:1.15.3' testImplementation 'org.testcontainers:postgresql:1.15.1' testImplementation 'org.postgresql:postgresql:42.2.18' } From 6dd0c4a2653695917321b80973a08d3e3160eb4c Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 17:46:31 +0800 Subject: [PATCH 27/34] Use fabric api for getting entrypoint. Add tests to make sure we are getting the right entrypoint. --- .../workers/process/KubePodProcess.java | 45 ++++++++++- .../process/KubeProcessBuilderFactoryPOC.java | 44 ----------- .../workers/process/KubePodProcessTest.java | 75 +++++++++++++++++++ .../KubeProcessBuilderFactoryPOCTest.java | 50 ------------- 4 files changed, 119 insertions(+), 95 deletions(-) create mode 100644 airbyte-workers/src/test/java/io/airbyte/workers/process/KubePodProcessTest.java delete mode 100644 airbyte-workers/src/test/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOCTest.java diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index 74a7a2b13a573..cbd358c0067e9 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -34,6 +34,7 @@ import io.fabric8.kubernetes.api.model.VolumeBuilder; import io.fabric8.kubernetes.api.model.VolumeMount; import io.fabric8.kubernetes.api.model.VolumeMountBuilder; +import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; import java.io.IOException; import java.io.InputStream; @@ -46,6 +47,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.commons.io.output.NullOutputStream; +import org.apache.commons.lang3.RandomStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,6 +65,47 @@ public class KubePodProcess extends Process { private final ServerSocket stdoutServerSocket; private final ExecutorService executorService; + // TODO(Davin): Cache this result. + public static String getCommandFromImage(KubernetesClient client, String imageName) throws IOException, InterruptedException { + final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); + + final String podName = "airbyte-command-fetcher-" + suffix; + + Container commandFetcher = new ContainerBuilder() + .withName("airbyte-command-fetcher") + .withImage(imageName) + .withCommand("sh", "-c", "echo \"AIRBYTE_ENTRYPOINT=$AIRBYTE_ENTRYPOINT\"") + .build(); + + Pod pod = new PodBuilder() + .withApiVersion("v1") + .withNewMetadata() + .withName(podName) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Never") + .withContainers(commandFetcher) + .endSpec() + .build(); + LOGGER.info("Creating pod..."); + Pod podDefinition = client.pods().inNamespace("default").createOrReplace(pod); + LOGGER.info("Waiting until command fetcher pod completes..."); + client.resource(podDefinition).waitUntilCondition(p -> p.getStatus().getPhase().equals("Succeeded"), 20, TimeUnit.SECONDS); + + var logs = client.pods().inNamespace("default").withName(podName).getLog(); + if (!logs.contains("AIRBYTE_ENTRYPOINT")) { + // this should not happen + throw new RuntimeException("Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); + } + + var envVal = logs.split("=")[1].strip(); + if (envVal.isEmpty()) { + throw new RuntimeException( + "Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); + } + return envVal; + } + public KubePodProcess(KubernetesClient client, String podName, String image, int stdoutLocalPort, boolean usesStdin) throws IOException, InterruptedException { this.client = client; @@ -84,7 +127,7 @@ public KubePodProcess(KubernetesClient client, String podName, String image, int }); // create pod - String entrypoint = KubeProcessBuilderFactoryPOC.getCommandFromImage(image); + String entrypoint = getCommandFromImage(client, image); LOGGER.info("Found entrypoint: {}", entrypoint); Volume volume = new VolumeBuilder() diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 05c56d2c76198..5ef1fb9b88dd1 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -32,7 +32,6 @@ import io.fabric8.kubernetes.client.KubernetesClient; import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.nio.file.Path; @@ -49,49 +48,6 @@ public class KubeProcessBuilderFactoryPOC { private static final KubernetesClient KUBE_CLIENT = new DefaultKubernetesClient(); - // todo: this should really be cached - public static String getCommandFromImage(String imageName) throws IOException { - final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); - - final String podName = "airbyte-command-fetcher-" + suffix; - - final List cmd = - Lists.newArrayList( - "kubectl", - "run", -// "--generator=run-pod/v1", - // "--rm", - "-i", - "--pod-running-timeout=24h", - "--image=" + imageName, - "--command=true", - "--restart=Never", - podName, - "--", - "sh", - "-c", - "echo \"AIRBYTE_ENTRYPOINT=$AIRBYTE_ENTRYPOINT\""); - - Process start = new ProcessBuilder(cmd).start(); - - try (BufferedReader reader = IOs.newBufferedReader(start.getInputStream())) { - String line; - while ((line = reader.readLine()) != null && !line.contains("AIRBYTE_ENTRYPOINT")); - - if (line == null || !line.contains("AIRBYTE_ENTRYPOINT")) { - throw new RuntimeException("Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); - } else { - String[] splits = line.split("=", 2); - if (splits.length == 1) { - throw new RuntimeException( - "Unable to read AIRBYTE_ENTRYPOINT from the image. Make sure this environment variable is set in the Dockerfile!"); - } else { - return splits[1]; - } - } - } - } - private static void saveJaredWork() { try { // todo: test this with args that are used by the process diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/process/KubePodProcessTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/process/KubePodProcessTest.java new file mode 100644 index 0000000000000..c4c97991f5ee5 --- /dev/null +++ b/airbyte-workers/src/test/java/io/airbyte/workers/process/KubePodProcessTest.java @@ -0,0 +1,75 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.workers.process; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import io.fabric8.kubernetes.client.DefaultKubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClient; +import java.io.IOException; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +public class KubePodProcessTest { + + private static final KubernetesClient CLIENT = new DefaultKubernetesClient(); + private static final String ENTRYPOINT = "/tmp/run.sh"; + private static final String TEST_IMAGE_NAME = "np_dest:dev"; + + @BeforeAll + public static void setup() { + // TODO(Davin): Why does building the container ahead doesn't work? + // new GenericContainer( + // new ImageFromDockerfile(TEST_IMAGE_NAME, false) + // .withDockerfileFromBuilder(builder -> { + // builder + // .from("debian") + // .env(Map.of("AIRBYTE_ENTRYPOINT", ENTRYPOINT)) + // .entryPoint(ENTRYPOINT) + // .build();})).withEnv("AIRBYTE_ENTRYPOINT", ENTRYPOINT); + } + + @Test + @DisplayName("Should error if image does not have the right env var set.") + public void testGetCommandFromImageNoCommand() { + assertThrows(RuntimeException.class, () -> KubePodProcess.getCommandFromImage(CLIENT, "debian")); + } + + @Test + @DisplayName("Should error if image does not exists.") + public void testGetCommandFromImageMissingImage() { + assertThrows(RuntimeException.class, () -> KubePodProcess.getCommandFromImage(CLIENT, "bad_missing_image")); + } + + @Test + @DisplayName("Should retrieve the right command if image has the right env var set.") + public void testGetCommandFromImageCommandPresent() throws IOException, InterruptedException { + var command = KubePodProcess.getCommandFromImage(CLIENT, TEST_IMAGE_NAME); + assertEquals(ENTRYPOINT, command); + } + +} diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOCTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOCTest.java deleted file mode 100644 index 071f74c317999..0000000000000 --- a/airbyte-workers/src/test/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOCTest.java +++ /dev/null @@ -1,50 +0,0 @@ -package io.airbyte.workers.process; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.io.IOException; -import java.util.Map; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; -import org.testcontainers.containers.GenericContainer; -import org.testcontainers.images.builder.ImageFromDockerfile; - -public class KubeProcessBuilderFactoryPOCTest { - private static final String ENTRYPOINT = "/tmp/run.sh"; - private static final String TEST_IMAGE_NAME = "np_dest:dev"; - - @BeforeAll - public static void setup() { - // TODO(Davin): Why does building the container ahead doesn't work? -// new GenericContainer( -// new ImageFromDockerfile(TEST_IMAGE_NAME, false) -// .withDockerfileFromBuilder(builder -> { -// builder -// .from("debian") -// .env(Map.of("AIRBYTE_ENTRYPOINT", ENTRYPOINT)) -// .entryPoint(ENTRYPOINT) -// .build();})).withEnv("AIRBYTE_ENTRYPOINT", ENTRYPOINT); - } - - @Test - @DisplayName("Should error if image does not have the right env var set.") - public void testGetCommandFromImageNoCommand() { - assertThrows(RuntimeException.class, () -> KubeProcessBuilderFactoryPOC.getCommandFromImage("hello-world")); - } - - @Test - @DisplayName("Should error if image does not exists.") - public void testGetCommandFromImageBadImage() { - assertThrows(RuntimeException.class, () -> KubeProcessBuilderFactoryPOC.getCommandFromImage("bad_missing_image")); - } - - @Test - @DisplayName("Should retrieve the right command if image has the right env var set.") - public void testGetCommandFromImageCommandPresent() throws IOException { - var command = KubeProcessBuilderFactoryPOC.getCommandFromImage(TEST_IMAGE_NAME); - assertEquals(ENTRYPOINT, command); - } - -} From cfed967e904206c4d8fc8d121c6a20b3f94817f1 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 18:00:56 +0800 Subject: [PATCH 28/34] Remove kubectl install dependency. --- airbyte-workers/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 07450a191d7fe..e435ea69234e9 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -2,9 +2,6 @@ FROM openjdk:14.0.2-slim WORKDIR /airbyte -# Just so we can run kubectl for now. -COPY --from=lachlanevenson/k8s-kubectl:v1.10.3 /usr/local/bin/kubectl /usr/local/bin/kubectl - COPY build/distributions/airbyte-workers*.tar airbyte-workers.tar RUN tar xf airbyte-workers.tar --strip-components=1 From 582df9d0332b42a0604cb1538126277f8b50ca81 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 18:23:32 +0800 Subject: [PATCH 29/34] Clean up getPodId and add tests. --- .../workers/process/KubePodProcess.java | 11 ++- .../process/KubeProcessBuilderFactoryPOC.java | 26 ------ .../workers/process/KubePodProcessTest.java | 82 +++++++++++++++---- 3 files changed, 77 insertions(+), 42 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index cbd358c0067e9..6939e4df495a2 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -34,7 +34,6 @@ import io.fabric8.kubernetes.api.model.VolumeBuilder; import io.fabric8.kubernetes.api.model.VolumeMount; import io.fabric8.kubernetes.api.model.VolumeMountBuilder; -import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; import java.io.IOException; import java.io.InputStream; @@ -106,6 +105,14 @@ public static String getCommandFromImage(KubernetesClient client, String imageNa return envVal; } + public static String getPodIP(KubernetesClient client, String podName) { + var pod = client.pods().inNamespace("default").withName(podName).get(); + if (pod == null) { + throw new RuntimeException("Error: unable to find pod!"); + } + return pod.getStatus().getPodIP(); + } + public KubePodProcess(KubernetesClient client, String podName, String image, int stdoutLocalPort, boolean usesStdin) throws IOException, InterruptedException { this.client = client; @@ -192,7 +199,7 @@ public KubePodProcess(KubernetesClient client, String podName, String image, int // allow writing stdin to pod LOGGER.info("Reading pod IP..."); - var podIp = KubeProcessBuilderFactoryPOC.getPodIP(podName); + var podIp = getPodIP(client, podName); LOGGER.info("Pod IP: {}", podIp); if (usesStdin) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 5ef1fb9b88dd1..82e64b6a993fe 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -24,10 +24,7 @@ package io.airbyte.workers.process; -import com.google.common.collect.Lists; import io.airbyte.commons.io.IOs; -import io.fabric8.kubernetes.api.model.Pod; -import io.fabric8.kubernetes.api.model.PodList; import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; import java.io.BufferedReader; @@ -35,10 +32,8 @@ import java.io.InputStreamReader; import java.io.PrintWriter; import java.nio.file.Path; -import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import org.apache.commons.lang3.RandomStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,27 +66,6 @@ private static void saveJaredWork() { } } - public static String getPodIP(String podName) { - // TODO: Why does directly searching for the pod not work? - // LOGGER.info(destPod.getStatus().getPodIP()); - // destPod = client.resource(destPod).get(); - // LOGGER.info("Status: {}", destPod.getStatus()); - // LOGGER.info("IP: {}", destPod.getStatus().getPodIP()); - // IP = destPod.getStatus().getPodIP(); - - // TODO: We could assign labels to pods to narrow the search. - PodList pods = KUBE_CLIENT.pods().inNamespace("default").list(); - for (Pod p : pods.getItems()) { - // Filter by pod and retrieve IP. - if (p.getMetadata().getName().equals(podName)) { - LOGGER.info("Found IP!"); - return p.getStatus().getPodIP(); - } - } - - return null; - } - public static void main(String[] args) throws InterruptedException, IOException { LOGGER.info("Launching source process..."); Process src = new KubePodProcess(KUBE_CLIENT, "src", "np_source:dev", 9002, false); diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/process/KubePodProcessTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/process/KubePodProcessTest.java index c4c97991f5ee5..26c7dfc7383f2 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/process/KubePodProcessTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/process/KubePodProcessTest.java @@ -27,11 +27,17 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import io.fabric8.kubernetes.api.model.ContainerBuilder; +import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodBuilder; import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; import java.io.IOException; +import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.RandomStringUtils; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; public class KubePodProcessTest { @@ -53,23 +59,71 @@ public static void setup() { // .build();})).withEnv("AIRBYTE_ENTRYPOINT", ENTRYPOINT); } - @Test - @DisplayName("Should error if image does not have the right env var set.") - public void testGetCommandFromImageNoCommand() { - assertThrows(RuntimeException.class, () -> KubePodProcess.getCommandFromImage(CLIENT, "debian")); - } + @Nested + class GetCommand { + + @Test + @DisplayName("Should error if image does not have the right env var set.") + public void testGetCommandFromImageNoCommand() { + assertThrows(RuntimeException.class, () -> KubePodProcess.getCommandFromImage(CLIENT, "debian")); + } + + @Test + @DisplayName("Should error if image does not exists.") + public void testGetCommandFromImageMissingImage() { + assertThrows(RuntimeException.class, () -> KubePodProcess.getCommandFromImage(CLIENT, "bad_missing_image")); + } + + @Test + @DisplayName("Should retrieve the right command if image has the right env var set.") + public void testGetCommandFromImageCommandPresent() throws IOException, InterruptedException { + var command = KubePodProcess.getCommandFromImage(CLIENT, TEST_IMAGE_NAME); + assertEquals(ENTRYPOINT, command); + } - @Test - @DisplayName("Should error if image does not exists.") - public void testGetCommandFromImageMissingImage() { - assertThrows(RuntimeException.class, () -> KubePodProcess.getCommandFromImage(CLIENT, "bad_missing_image")); } - @Test - @DisplayName("Should retrieve the right command if image has the right env var set.") - public void testGetCommandFromImageCommandPresent() throws IOException, InterruptedException { - var command = KubePodProcess.getCommandFromImage(CLIENT, TEST_IMAGE_NAME); - assertEquals(ENTRYPOINT, command); + @Nested + class GetPodIp { + + @Test + @DisplayName("Should error when the given pod does not exists.") + public void testGetPodIpNoPod() { + assertThrows(RuntimeException.class, () -> KubePodProcess.getPodIP(CLIENT, "pod-does-not-exist")); + } + + @Test + @DisplayName("Should return the correct pod ip.") + public void testGetPodIpGoodPod() throws InterruptedException { + final String suffix = RandomStringUtils.randomAlphabetic(5).toLowerCase(); + var sleep = new ContainerBuilder() + .withImage("busybox") + .withName("sleep") + .withCommand("sleep", "100000") + .build(); + + var podName = "test-get-pod-good-pod-" + suffix; + Pod podDef = new PodBuilder() + .withApiVersion("v1") + .withNewMetadata() + .withName(podName) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Never") + .withRestartPolicy("Never") + .withContainers(sleep) + .endSpec() + .build(); + + Pod pod = CLIENT.pods().inNamespace("default").createOrReplace(podDef); + CLIENT.resource(pod).waitUntilReady(20, TimeUnit.SECONDS); + + var ip = KubePodProcess.getPodIP(CLIENT, podName); + var exp = CLIENT.pods().inNamespace("default").withName(podName).get().getStatus().getPodIP(); + assertEquals(exp, ip); + CLIENT.resource(podDef).inNamespace("default").delete(); + } + } } From 9aae5da8f43288dcc4a1c73cbd340037cc06bddc Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 18:44:39 +0800 Subject: [PATCH 30/34] Make sure all client.pod calls include namespace. --- .../main/java/io/airbyte/workers/process/KubePodProcess.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index 6939e4df495a2..bc78859723690 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -246,7 +246,7 @@ private boolean isTerminal(Pod pod) { } private int getReturnCode(Pod pod) { - Pod refreshedPod = client.pods().withName(pod.getMetadata().getName()).get(); // todo: use more robust version here + Pod refreshedPod = client.pods().inNamespace("default").withName(pod.getMetadata().getName()).get(); Preconditions.checkArgument(isTerminal(refreshedPod)); return refreshedPod.getStatus().getContainerStatuses() From 2072dd066febcdd3cc437bdc3cf3e4ab637fd35e Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 21:19:24 +0800 Subject: [PATCH 31/34] Add comments to explain what is happening. --- airbyte-workers/build.gradle | 9 ++ .../workers/process/KubePodProcess.java | 10 ++ .../process/KubeProcessBuilderFactoryPOC.java | 91 ++++++++++--------- 3 files changed, 65 insertions(+), 45 deletions(-) diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index ac6e009a3d8b0..4911479ed5213 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -8,6 +8,15 @@ plugins { application { mainClass = 'io.airbyte.workers.process.KubeProcessBuilderFactoryPOC' + applicationDefaultJvmArgs = ['-Xmx500m', + '-XX:NativeMemoryTracking=detail', + "-Djava.rmi.server.hostname=localhost", + '-Dcom.sun.management.jmxremote=true', + '-Dcom.sun.management.jmxremote.port=6000', + "-Dcom.sun.management.jmxremote.rmi.port=6000", + '-Dcom.sun.management.jmxremote.local.only=false', + '-Dcom.sun.management.jmxremote.authenticate=false', + '-Dcom.sun.management.jmxremote.ssl=false'] } configurations { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index bc78859723690..4e6e3e185ef8f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -230,7 +230,17 @@ public InputStream getErrorStream() { @Override public int waitFor() throws InterruptedException { + // These are closed in the opposite order in which they are created to prevent any resource conflicts. client.resource(podDefinition).waitUntilCondition(this::isTerminal, 10, TimeUnit.DAYS); + try { + this.stdin.close(); + this.stdoutServerSocket.close(); + this.stdout.close(); + this.executorService.shutdownNow(); + } catch (IOException e) { + e.printStackTrace(); + } + return exitValue(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 82e64b6a993fe..dfc7eb8befd68 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -25,8 +25,11 @@ package io.airbyte.workers.process; import io.airbyte.commons.io.IOs; +import io.fabric8.kubernetes.client.Config; +import io.fabric8.kubernetes.client.ConfigBuilder; import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; +import io.fabric8.kubernetes.client.utils.HttpClientUtils; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -34,37 +37,19 @@ import java.nio.file.Path; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import okhttp3.OkHttpClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class KubeProcessBuilderFactoryPOC { private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactoryPOC.class); - - private static final KubernetesClient KUBE_CLIENT = new DefaultKubernetesClient(); - - private static void saveJaredWork() { - try { - // todo: test this with args that are used by the process - Process process = new KubeProcessBuilderFactory(Path.of("stdout_template.yaml")) - .create(0L, 0, Path.of("/tmp"), "np_source:dev", null) - .start(); - - process.getOutputStream().write(100); - process.getInputStream().read(); - - // after running this main: - // kubectl port-forward airbyte-worker-0-0-fmave 9000:9000 - // socat -d -d -d TCP-LISTEN:9000,bind=127.0.0.1 stdout - - LOGGER.info("waiting..."); - int code = process.waitFor(); - LOGGER.info("code = " + code); - } catch (Exception e) { - LOGGER.error(e.getMessage()); - e.printStackTrace(); - } - } + // Explicitly create the underlying HTTP client since the Kube client has issues with closing the client. It is not clear in which library the fault + // lies. See https://github.com/fabric8io/kubernetes-client/issues/2403. + private static final Config CONFIG = new ConfigBuilder().build(); + private static final OkHttpClient OK_HTTP_CLIENT = HttpClientUtils.createHttpClient(CONFIG); + private static final KubernetesClient KUBE_CLIENT = new DefaultKubernetesClient(OK_HTTP_CLIENT, CONFIG); public static void main(String[] args) throws InterruptedException, IOException { LOGGER.info("Launching source process..."); @@ -75,39 +60,55 @@ public static void main(String[] args) throws InterruptedException, IOException LOGGER.info("Launching background thread to read destination lines..."); ExecutorService executor = Executors.newSingleThreadExecutor(); - executor.submit(() -> { + var listenTask = executor.submit(() -> { BufferedReader reader = new BufferedReader(new InputStreamReader(dest.getInputStream())); - - while (true) { - try { - String line; - if ((line = reader.readLine()) != null) { - LOGGER.info("Destination sent: {}", line); - } - } catch (IOException e) { - e.printStackTrace(); + try { + String line; + while ((line = reader.readLine()) != null) { + LOGGER.info("Destination sent: {}", line); } + } catch (IOException e) { + e.printStackTrace(); } }); LOGGER.info("Copying source stdout to destination stdin..."); - BufferedReader reader = IOs.newBufferedReader(src.getInputStream()); - PrintWriter writer = new PrintWriter(dest.getOutputStream(), true); - - String line; - while ((line = reader.readLine()) != null) { - writer.println(line); + try (BufferedReader reader = IOs.newBufferedReader(src.getInputStream())) { + try (PrintWriter writer = new PrintWriter(dest.getOutputStream(), true)) { + String line; + while ((line = reader.readLine()) != null) { + writer.println(line); + } + } } - writer.close(); - LOGGER.info("Waiting for source..."); + LOGGER.info("Waiting for source process to terminate..."); src.waitFor(); - LOGGER.info("Waiting for destination..."); + LOGGER.info("Waiting for destination process to terminate..."); dest.waitFor(); + + LOGGER.info("Closing sync worker resources..."); + listenTask.cancel(true); + executor.shutdownNow(); + // TODO(Davin): Figure out why these commands are not effectively shutting down OkHTTP even though documentation suggests so. See https://square.github.io/okhttp/4.x/okhttp/okhttp3/-ok-http-client/#shutdown-isnt-necessary + // Instead, the pod shuts down after 5 minutes as the pool reaps the remaining idle connection after 5 minutes of inactivity, as per the default configuration. + OK_HTTP_CLIENT.dispatcher().executorService().shutdownNow(); + OK_HTTP_CLIENT.connectionPool().evictAll(); + KUBE_CLIENT.close(); + LOGGER.info("Done!"); - System.exit(0); // todo: handle executors so we don't need to kill the JVM + // Debug Statements, to remove both merging. + LOGGER.info("src: {}", src.isAlive()); + LOGGER.info("dest: {}", dest.isAlive()); + LOGGER.info("executor terminated: {}", executor.isTerminated()); + LOGGER.info("executor shutdown: {}", executor.isShutdown()); + LOGGER.info("executor terminated: {}", executor.isTerminated()); + LOGGER.info("ok http client pool size: {}", OK_HTTP_CLIENT.connectionPool().connectionCount()); + LOGGER.info("ok http client pool idle size: {}", OK_HTTP_CLIENT.connectionPool().idleConnectionCount()); + LOGGER.info("ok http executor service shutdown: {}", OK_HTTP_CLIENT.dispatcher().executorService().isShutdown()); + } } From a24afdb2ee9c004e75c9cb30067feab9a89b5f2f Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 21:22:21 +0800 Subject: [PATCH 32/34] Format. --- .../io/airbyte/workers/process/KubePodProcess.java | 3 ++- .../process/KubeProcessBuilderFactoryPOC.java | 12 +++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index 4e6e3e185ef8f..185c61d8242ac 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -230,7 +230,8 @@ public InputStream getErrorStream() { @Override public int waitFor() throws InterruptedException { - // These are closed in the opposite order in which they are created to prevent any resource conflicts. + // These are closed in the opposite order in which they are created to prevent any resource + // conflicts. client.resource(podDefinition).waitUntilCondition(this::isTerminal, 10, TimeUnit.DAYS); try { this.stdin.close(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index dfc7eb8befd68..8a1155ab7cc4f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -34,10 +34,8 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; -import java.nio.file.Path; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; import okhttp3.OkHttpClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,7 +43,8 @@ public class KubeProcessBuilderFactoryPOC { private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactoryPOC.class); - // Explicitly create the underlying HTTP client since the Kube client has issues with closing the client. It is not clear in which library the fault + // Explicitly create the underlying HTTP client since the Kube client has issues with closing the + // client. It is not clear in which library the fault // lies. See https://github.com/fabric8io/kubernetes-client/issues/2403. private static final Config CONFIG = new ConfigBuilder().build(); private static final OkHttpClient OK_HTTP_CLIENT = HttpClientUtils.createHttpClient(CONFIG); @@ -91,8 +90,11 @@ public static void main(String[] args) throws InterruptedException, IOException LOGGER.info("Closing sync worker resources..."); listenTask.cancel(true); executor.shutdownNow(); - // TODO(Davin): Figure out why these commands are not effectively shutting down OkHTTP even though documentation suggests so. See https://square.github.io/okhttp/4.x/okhttp/okhttp3/-ok-http-client/#shutdown-isnt-necessary - // Instead, the pod shuts down after 5 minutes as the pool reaps the remaining idle connection after 5 minutes of inactivity, as per the default configuration. + // TODO(Davin): Figure out why these commands are not effectively shutting down OkHTTP even though + // documentation suggests so. See + // https://square.github.io/okhttp/4.x/okhttp/okhttp3/-ok-http-client/#shutdown-isnt-necessary + // Instead, the pod shuts down after 5 minutes as the pool reaps the remaining idle connection after + // 5 minutes of inactivity, as per the default configuration. OK_HTTP_CLIENT.dispatcher().executorService().shutdownNow(); OK_HTTP_CLIENT.connectionPool().evictAll(); KUBE_CLIENT.close(); From 193f9942fa3e12d8f1dec4b4b3c45fb1a23887b1 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 25 May 2021 21:27:41 +0800 Subject: [PATCH 33/34] Remove unused test dockerfile. --- airbyte-workers/src/test/resources/dockerfile-with-env-var | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 airbyte-workers/src/test/resources/dockerfile-with-env-var diff --git a/airbyte-workers/src/test/resources/dockerfile-with-env-var b/airbyte-workers/src/test/resources/dockerfile-with-env-var deleted file mode 100644 index 56a00b3a4adcb..0000000000000 --- a/airbyte-workers/src/test/resources/dockerfile-with-env-var +++ /dev/null @@ -1,3 +0,0 @@ -FROM hello-world - -ENV AIRBYTE_ENTRYPOINT="/tmp/run.sh" From f0a53c0a060321cddfca5d9854d2d14146de42ff Mon Sep 17 00:00:00 2001 From: jrhizor Date: Tue, 25 May 2021 18:55:06 -0700 Subject: [PATCH 34/34] prevent kube pod process from hanging --- .../process/HttpClientUtilsAirbyte.java | 300 ++++++++++++++++++ .../process/KubeProcessBuilderFactoryPOC.java | 19 +- 2 files changed, 312 insertions(+), 7 deletions(-) create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/process/HttpClientUtilsAirbyte.java diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/HttpClientUtilsAirbyte.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/HttpClientUtilsAirbyte.java new file mode 100644 index 0000000000000..a3e3af537fa1b --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/HttpClientUtilsAirbyte.java @@ -0,0 +1,300 @@ +package io.airbyte.workers.process; + +import io.fabric8.kubernetes.api.model.ListOptions; +import io.fabric8.kubernetes.client.Config; +import io.fabric8.kubernetes.client.KubernetesClientException; +import io.fabric8.kubernetes.client.internal.SSLUtils; +import io.fabric8.kubernetes.client.utils.BackwardsCompatibilityInterceptor; +import io.fabric8.kubernetes.client.utils.ImpersonatorInterceptor; +import io.fabric8.kubernetes.client.utils.IpAddressMatcher; +import io.fabric8.kubernetes.client.utils.TokenRefreshInterceptor; +import io.fabric8.kubernetes.client.utils.Utils; +import okhttp3.ConnectionPool; +import okhttp3.ConnectionSpec; +import okhttp3.Credentials; +import okhttp3.Dispatcher; +import okhttp3.HttpUrl; +import okhttp3.Interceptor; +import okhttp3.OkHttpClient; +import okhttp3.Protocol; +import okhttp3.Request; +import okhttp3.logging.HttpLoggingInterceptor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.KeyManager; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import java.net.InetSocketAddress; +import java.net.MalformedURLException; +import java.net.Proxy; +import java.net.URL; +import java.security.GeneralSecurityException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static okhttp3.ConnectionSpec.CLEARTEXT; + +public class HttpClientUtilsAirbyte { + private HttpClientUtilsAirbyte() { } + + private static Pattern VALID_IPV4_PATTERN = null; + public static final String ipv4Pattern = "(http:\\/\\/|https:\\/\\/)?(([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.){3}([01]?\\d\\d?|2[0-4]\\d|25[0-5])(\\/[0-9]\\d|1[0-9]\\d|2[0-9]\\d|3[0-2]\\d)?"; + protected static final String KUBERNETES_BACKWARDS_COMPATIBILITY_INTERCEPTOR_DISABLE = "kubernetes.backwardsCompatibilityInterceptor.disable"; + + static { + try { + VALID_IPV4_PATTERN = Pattern.compile(ipv4Pattern, Pattern.CASE_INSENSITIVE); + } catch (PatternSyntaxException e) { + throw KubernetesClientException.launderThrowable("Unable to compile ipv4address pattern.", e); + } + } + + public static OkHttpClient createHttpClient(final Config config) { + return createHttpClient(config, (b) -> {}); + } + + public static OkHttpClient createHttpClientForMockServer(final Config config) { + return createHttpClient(config, b -> b.protocols(Collections.singletonList(Protocol.HTTP_1_1))); + } + + public static HttpUrl.Builder appendListOptionParams(HttpUrl.Builder urlBuilder, ListOptions listOptions) { + if (listOptions == null) { + return urlBuilder; + } + if (listOptions.getLimit() != null) { + urlBuilder.addQueryParameter("limit", listOptions.getLimit().toString()); + } + if (listOptions.getContinue() != null) { + urlBuilder.addQueryParameter("continue", listOptions.getContinue()); + } + + if (listOptions.getResourceVersion() != null) { + urlBuilder.addQueryParameter("resourceVersion", listOptions.getResourceVersion()); + } + + if (listOptions.getFieldSelector() != null) { + urlBuilder.addQueryParameter("fieldSelector", listOptions.getFieldSelector()); + } + + if (listOptions.getLabelSelector() != null) { + urlBuilder.addQueryParameter("labelSelector", listOptions.getLabelSelector()); + } + + if (listOptions.getTimeoutSeconds() != null) { + urlBuilder.addQueryParameter("timeoutSeconds", listOptions.getTimeoutSeconds().toString()); + } + + if (listOptions.getAllowWatchBookmarks() != null) { + urlBuilder.addQueryParameter("allowWatchBookmarks", listOptions.getAllowWatchBookmarks().toString()); + } + + if (listOptions.getWatch() != null) { + urlBuilder.addQueryParameter("watch", listOptions.getWatch().toString()); + } + return urlBuilder; + } + + private static OkHttpClient createHttpClient(final Config config, final Consumer additionalConfig) { + try { + OkHttpClient.Builder httpClientBuilder = new OkHttpClient.Builder(); + + httpClientBuilder.connectionPool(new ConnectionPool(0, 30, TimeUnit.SECONDS)); + + // Follow any redirects + httpClientBuilder.followRedirects(true); + httpClientBuilder.followSslRedirects(true); + + if (config.isTrustCerts() || config.isDisableHostnameVerification()) { + httpClientBuilder.hostnameVerifier((s, sslSession) -> true); + } + + TrustManager[] trustManagers = SSLUtils.trustManagers(config); + KeyManager[] keyManagers = SSLUtils.keyManagers(config); + + if (keyManagers != null || trustManagers != null || config.isTrustCerts()) { + X509TrustManager trustManager = null; + if (trustManagers != null && trustManagers.length == 1) { + trustManager = (X509TrustManager) trustManagers[0]; + } + + try { + SSLContext sslContext = SSLUtils.sslContext(keyManagers, trustManagers); + httpClientBuilder.sslSocketFactory(sslContext.getSocketFactory(), trustManager); + } catch (GeneralSecurityException e) { + throw new AssertionError(); // The system has no TLS. Just give up. + } + } else { + SSLContext context = SSLContext.getInstance("TLSv1.2"); + context.init(keyManagers, trustManagers, null); + httpClientBuilder.sslSocketFactory(context.getSocketFactory(), (X509TrustManager) trustManagers[0]); + } + + List interceptors = createApplicableInterceptors(config); + interceptors.forEach(httpClientBuilder::addInterceptor); + Logger reqLogger = LoggerFactory.getLogger(HttpLoggingInterceptor.class); + if (reqLogger.isTraceEnabled()) { + HttpLoggingInterceptor loggingInterceptor = new HttpLoggingInterceptor(); + loggingInterceptor.setLevel(HttpLoggingInterceptor.Level.BODY); + httpClientBuilder.addNetworkInterceptor(loggingInterceptor); + } + + if (config.getConnectionTimeout() > 0) { + httpClientBuilder.connectTimeout(config.getConnectionTimeout(), TimeUnit.MILLISECONDS); + } + + if (config.getRequestTimeout() > 0) { + httpClientBuilder.readTimeout(config.getRequestTimeout(), TimeUnit.MILLISECONDS); + } + + if (config.getWebsocketPingInterval() > 0) { + httpClientBuilder.pingInterval(config.getWebsocketPingInterval(), TimeUnit.MILLISECONDS); + } + + if (config.getMaxConcurrentRequests() > 0 && config.getMaxConcurrentRequestsPerHost() > 0) { + Dispatcher dispatcher = new Dispatcher(); + dispatcher.setMaxRequests(config.getMaxConcurrentRequests()); + dispatcher.setMaxRequestsPerHost(config.getMaxConcurrentRequestsPerHost()); + httpClientBuilder.dispatcher(dispatcher); + } + + // Only check proxy if it's a full URL with protocol + if (config.getMasterUrl().toLowerCase(Locale.ROOT).startsWith(Config.HTTP_PROTOCOL_PREFIX) || config.getMasterUrl().startsWith(Config.HTTPS_PROTOCOL_PREFIX)) { + try { + URL proxyUrl = getProxyUrl(config); + if (proxyUrl != null) { + httpClientBuilder.proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyUrl.getHost(), proxyUrl.getPort()))); + + if (config.getProxyUsername() != null) { + httpClientBuilder.proxyAuthenticator((route, response) -> { + + String credential = Credentials.basic(config.getProxyUsername(), config.getProxyPassword()); + return response.request().newBuilder().header("Proxy-Authorization", credential).build(); + }); + } + } else { + httpClientBuilder.proxy(Proxy.NO_PROXY); + } + + } catch (MalformedURLException e) { + throw new KubernetesClientException("Invalid proxy server configuration", e); + } + } + + if (config.getUserAgent() != null && !config.getUserAgent().isEmpty()) { + httpClientBuilder.addNetworkInterceptor(chain -> { + Request agent = chain.request().newBuilder().header("User-Agent", config.getUserAgent()).build(); + return chain.proceed(agent); + }); + } + + if (config.getTlsVersions() != null && config.getTlsVersions().length > 0) { + ConnectionSpec spec = new ConnectionSpec.Builder(ConnectionSpec.MODERN_TLS) + .tlsVersions(config.getTlsVersions()) + .build(); + httpClientBuilder.connectionSpecs(Arrays.asList(spec, CLEARTEXT)); + } + + if (shouldDisableHttp2() || config.isHttp2Disable()) { + httpClientBuilder.protocols(Collections.singletonList(Protocol.HTTP_1_1)); + } + + if(additionalConfig != null) { + additionalConfig.accept(httpClientBuilder); + } + + if (config.getCustomHeaders() != null && !config.getCustomHeaders().isEmpty()) { + httpClientBuilder.addNetworkInterceptor(chain -> { + Request.Builder agent = chain.request().newBuilder(); + for (Map.Entry entry : config.getCustomHeaders().entrySet()) { + agent.addHeader(entry.getKey(),entry.getValue()); + } + return chain.proceed(agent.build()); + }); + } + + return httpClientBuilder.build(); + } catch (Exception e) { + throw KubernetesClientException.launderThrowable(e); + } + } + + private static URL getProxyUrl(Config config) throws MalformedURLException { + URL master = new URL(config.getMasterUrl()); + String host = master.getHost(); + if (config.getNoProxy() != null) { + for (String noProxy : config.getNoProxy()) { + if (isIpAddress(noProxy)) { + if (new IpAddressMatcher(noProxy).matches(host)) { + return null; + } + } else { + if (host.contains(noProxy)) { + return null; + } + } + } + } + String proxy = config.getHttpsProxy(); + if (master.getProtocol().equals("http")) { + proxy = config.getHttpProxy(); + } + if (proxy != null) { + return new URL(proxy); + } + return null; + } + + private static boolean isIpAddress(String ipAddress) { + Matcher ipMatcher = VALID_IPV4_PATTERN.matcher(ipAddress); + return ipMatcher.matches(); + } + + /** + * OkHttp wrongfully detects >JDK8u251 as which enables Http2 + * unsupported for JDK8. + * + * @return true if JDK8 is detected, false otherwise- + * @see #2212 + */ + private static boolean shouldDisableHttp2() { + return System.getProperty("java.version", "").startsWith("1.8"); + } + + static List createApplicableInterceptors(Config config) { + List interceptors = new ArrayList<>(); + // Header Interceptor + interceptors.add(chain -> { + Request request = chain.request(); + if (Utils.isNotNullOrEmpty(config.getUsername()) && Utils.isNotNullOrEmpty(config.getPassword())) { + Request authReq = chain.request().newBuilder().addHeader("Authorization", Credentials.basic(config.getUsername(), config.getPassword())).build(); + return chain.proceed(authReq); + } else if (Utils.isNotNullOrEmpty(config.getOauthToken())) { + Request authReq = chain.request().newBuilder().addHeader("Authorization", "Bearer " + config.getOauthToken()).build(); + return chain.proceed(authReq); + } + return chain.proceed(request); + }); + // Impersonator Interceptor + interceptors.add(new ImpersonatorInterceptor(config)); + // Token Refresh Interceptor + interceptors.add(new TokenRefreshInterceptor(config)); + // Backwards Compatibility Interceptor + String shouldDisableBackwardsCompatibilityInterceptor = Utils.getSystemPropertyOrEnvVar(KUBERNETES_BACKWARDS_COMPATIBILITY_INTERCEPTOR_DISABLE, "false"); + if (!Boolean.parseBoolean(shouldDisableBackwardsCompatibilityInterceptor)) { + interceptors.add(new BackwardsCompatibilityInterceptor()); + } + + return interceptors; + } +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java index 8a1155ab7cc4f..39aed7292e950 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessBuilderFactoryPOC.java @@ -36,6 +36,9 @@ import java.io.PrintWriter; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import okhttp3.ConnectionPool; import okhttp3.OkHttpClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,14 +46,15 @@ public class KubeProcessBuilderFactoryPOC { private static final Logger LOGGER = LoggerFactory.getLogger(KubeProcessBuilderFactoryPOC.class); - // Explicitly create the underlying HTTP client since the Kube client has issues with closing the - // client. It is not clear in which library the fault - // lies. See https://github.com/fabric8io/kubernetes-client/issues/2403. - private static final Config CONFIG = new ConfigBuilder().build(); - private static final OkHttpClient OK_HTTP_CLIENT = HttpClientUtils.createHttpClient(CONFIG); - private static final KubernetesClient KUBE_CLIENT = new DefaultKubernetesClient(OK_HTTP_CLIENT, CONFIG); public static void main(String[] args) throws InterruptedException, IOException { + // Explicitly create the underlying HTTP client since the Kube client has issues with closing the + // client. It is not clear in which library the fault + // lies. See https://github.com/fabric8io/kubernetes-client/issues/2403. + final Config CONFIG = new ConfigBuilder().build(); + final OkHttpClient OK_HTTP_CLIENT = HttpClientUtilsAirbyte.createHttpClient(CONFIG); + final KubernetesClient KUBE_CLIENT = new DefaultKubernetesClient(OK_HTTP_CLIENT, CONFIG); + LOGGER.info("Launching source process..."); Process src = new KubePodProcess(KUBE_CLIENT, "src", "np_source:dev", 9002, false); @@ -95,9 +99,10 @@ public static void main(String[] args) throws InterruptedException, IOException // https://square.github.io/okhttp/4.x/okhttp/okhttp3/-ok-http-client/#shutdown-isnt-necessary // Instead, the pod shuts down after 5 minutes as the pool reaps the remaining idle connection after // 5 minutes of inactivity, as per the default configuration. + KUBE_CLIENT.close(); + OK_HTTP_CLIENT.dispatcher().cancelAll(); OK_HTTP_CLIENT.dispatcher().executorService().shutdownNow(); OK_HTTP_CLIENT.connectionPool().evictAll(); - KUBE_CLIENT.close(); LOGGER.info("Done!");