Skip to content

Commit

Permalink
connectors-ci: make source-file testable in airbyte-ci (#27107)
Browse files Browse the repository at this point in the history
  • Loading branch information
alafanechere authored Jun 8, 2023
1 parent a3c75e7 commit c2b7043
Show file tree
Hide file tree
Showing 17 changed files with 134 additions and 59 deletions.
13 changes: 11 additions & 2 deletions airbyte-integrations/connectors/source-file-secure/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
FROM airbyte/source-file:0.3.9
### WARNING ###
# This Dockerfile will soon be deprecated.
# It is not used to build the connector image we publish to DockerHub.
# The new logic to build the connector image is declared with Dagger here:
# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L771

# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder.
# Please reach out to the Connectors Operations team if you have any question.
FROM airbyte/source-file:0.3.10

WORKDIR /airbyte/integration_code
COPY source_file_secure ./source_file_secure
COPY main.py ./
COPY setup.py ./
ENV DOCKER_BUILD=True
RUN pip install .

ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.3.9
LABEL io.airbyte.version=0.3.10
LABEL io.airbyte.name=airbyte/source-file-secure
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerImageTag: 0.3.9
dockerImageTag: 0.3.10
dockerRepository: airbyte/source-file-secure
githubIssueLabel: source-file
icon: file.svg
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
-e ../../bases/connector-acceptance-test
-e ../source-file
-e .
14 changes: 14 additions & 0 deletions airbyte-integrations/connectors/source-file-secure/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,20 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import os
from pathlib import Path

from setuptools import find_packages, setup


def local_dependency(name: str) -> str:
"""Returns a path to a local package."""
if os.environ.get("DAGGER_BUILD"):
return f"{name} @ file:///local_dependencies/{name}"
else:
return f"{name} @ file://{Path.cwd().parent / name}"


MAIN_REQUIREMENTS = [
"airbyte-cdk~=0.1",
"gcsfs==2022.7.1",
Expand All @@ -23,6 +34,9 @@
"pyxlsb==1.0.9",
]

if not os.environ.get("DOCKER_BUILD"):
MAIN_REQUIREMENTS.append(local_dependency("source-file"))

TEST_REQUIREMENTS = ["boto3==1.21.21", "pytest==7.1.2", "pytest-docker==1.0.0", "pytest-mock~=3.8.2"]

setup(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,19 @@

import json
import os
import sys

import source_file

# some integration tests doesn't setup dependences from
# requirements.txt file and Python can return a exception.
# Thus we should to import this parent module manually
from airbyte_cdk import AirbyteLogger
from airbyte_cdk.models import ConnectorSpecification

try:
import source_file.source
except ModuleNotFoundError:
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_source_local = os.path.join(current_dir, "../../source-file")
if os.path.isdir(parent_source_local):
sys.path.append(parent_source_local)
else:
raise RuntimeError("not found parent source folder")
import source_file.source

# import original classes of the native Source File
from source_file import SourceFile as ParentSourceFile
from source_file.client import Client
from source_file.client import URLFile as ParentURLFile

LOCAL_STORAGE_NAME = "local"


class URLFileSecure(ParentURLFile):
class URLFileSecure(source_file.client.URLFile):
"""Updating of default logic:
This connector shouldn't work with local files.
"""
Expand All @@ -43,7 +28,7 @@ def __init__(self, url: str, provider: dict, binary=None, encoding=None):
super().__init__(url, provider, binary, encoding)


class SourceFileSecure(ParentSourceFile):
class SourceFileSecure(source_file.SourceFile):
"""Updating of default source logic
This connector shouldn't work with local files.
The base logic of this connector are implemented in the "source-file" connector.
Expand All @@ -52,7 +37,7 @@ class SourceFileSecure(ParentSourceFile):
@property
def client_class(self):
# replace a standard class variable to the new one
class ClientSecure(Client):
class ClientSecure(source_file.client.Client):
reader_class = URLFileSecure

return ClientSecure
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-file/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ COPY source_file ./source_file
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.3.9
LABEL io.airbyte.version=0.3.10
LABEL io.airbyte.name=airbyte/source-file
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import os
import random
import shutil
import socket
import string
import tempfile
Expand Down Expand Up @@ -81,7 +82,17 @@ def is_ssh_ready(ip, port):


@pytest.fixture(scope="session")
def ssh_service(docker_ip, docker_services):
def move_sample_files_to_tmp():
"""Copy sample files to /tmp so that they can be accessed by the dockerd service in the context of Dagger test runs.
The sample files are mounted to the SSH service from the container under test (container) following instructions of docker-compose.yml in this directory."""
sample_files = Path(HERE / "sample_files")
shutil.copytree(sample_files, "/tmp/s3_sample_files")
yield True
shutil.rmtree("/tmp/s3_sample_files")


@pytest.fixture(scope="session")
def ssh_service(move_sample_files_to_tmp, docker_ip, docker_services):
"""Ensure that SSH service is up and responsive."""
# `port_for` takes a container port and returns the corresponding host port
port = docker_services.port_for("ssh", 22)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
version: '3'
version: "3"
services:
ssh:
image: atmoz/sftp
ports:
- "2222:22"
volumes:
- ./sample_files:/home/user1/files
- /tmp/s3_sample_files:/home/user1/files
command: user1:abc123@456#:1001
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-file/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerImageTag: 0.3.9
dockerImageTag: 0.3.10
dockerRepository: airbyte/source-file
githubIssueLabel: source-file
icon: file.svg
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-file/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"pyxlsb==1.0.9",
]

TEST_REQUIREMENTS = ["pytest~=6.2", "pytest-docker~=1.0.0", "pytest-mock~=3.6.1"]
TEST_REQUIREMENTS = ["pytest~=6.2", "pytest-docker~=1.0.0", "pytest-mock~=3.6.1", "docker-compose"]

setup(
name="source_file",
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/file.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ In order to read large files from a remote location, this connector uses the [sm

| Version | Date | Pull Request | Subject |
|:----------|:-------------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------|
| 0.3.10 | 2023-06-07 | [27107](https://github.com/airbytehq/airbyte/pull/27107) | Make source-file testable in our new airbyte-ci pipelines |
| 0.3.9 | 2023-05-18 | [26275](https://github.com/airbytehq/airbyte/pull/26275) | add ParserError handling |
| 0.3.8 | 2023-05-17 | [26210](https://github.com/airbytehq/airbyte/pull/26210) | Bugfix for https://github.com/airbytehq/airbyte/pull/26115 |
| 0.3.7 | 2023-05-16 | [26131](https://github.com/airbytehq/airbyte/pull/26131) | Re-release source-file to be in sync with source-file-secure |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from __future__ import annotations

import importlib.util
import re
import uuid
from typing import TYPE_CHECKING, List, Optional
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional, Tuple

from ci_connector_ops.pipelines import consts
from ci_connector_ops.pipelines.consts import (
Expand Down Expand Up @@ -48,7 +50,7 @@ def with_python_base(context: PipelineContext, python_image_name: str = "python:
.from_(python_image_name)
.with_mounted_cache("/root/.cache/pip", pip_cache)
.with_mounted_directory("/tools", context.get_repo_dir("tools", include=["ci_credentials", "ci_common_utils"]))
.with_exec(["pip", "install", "--upgrade", "pip"])
.with_exec(["pip", "install", "pip==23.1.2"])
)

return base_container
Expand Down Expand Up @@ -99,6 +101,47 @@ def with_python_package(
return container


async def find_local_python_dependencies(context: PipelineContext, package_source_code_path: str) -> Tuple[List[str], List[str]]:
"""Retrieve the list of local dependencies of a python package source code.
Returns both the list of local dependencies found in setup.py and requirements.txt.
Args:
context (PipelineContext): The current pipeline context.
package_source_code_path (str): Path to the package source code in airbyte repo .
Returns:
Tuple[List[str], List[str]]: A tuple containing the list of local dependencies found in setup.py and requirements.txt.
"""
python_environment = with_python_base(context)
container = with_python_package(context, python_environment, package_source_code_path)

# Find local dependencies in setup.py
setup_dependency_paths = []
if await get_file_contents(container, "setup.py"):
container_with_egg_info = container.with_exec(["python", "setup.py", "egg_info"])
egg_info_output = await container_with_egg_info.stdout()
for line in egg_info_output.split("\n"):
if line.startswith("writing requirements to"):
requires_txt_path = line.replace("writing requirements to", "").strip()
requires_txt = await container_with_egg_info.file(requires_txt_path).contents()
for line in requires_txt.split("\n"):
if "file://" in line:
match = re.search(r"file:///(.+)", line)
if match:
setup_dependency_paths.append(match.group(1))
break

# Find local dependencies in requirements.txt
requirements_dependency_paths = []
if requirements_txt := await get_file_contents(container, "requirements.txt"):
for line in requirements_txt.split("\n"):
if line.startswith("-e ."):
local_dependency_path = Path(package_source_code_path + "/" + line[3:]).resolve()
local_dependency_path = str(local_dependency_path.relative_to(Path.cwd()))
requirements_dependency_paths.append(local_dependency_path)
return setup_dependency_paths, requirements_dependency_paths


async def with_installed_python_package(
context: PipelineContext,
python_environment: Container,
Expand All @@ -118,20 +161,19 @@ async def with_installed_python_package(
Returns:
Container: A python environment container with the python package installed.
"""
install_local_requirements_cmd = ["python", "-m", "pip", "install", "-r", "requirements.txt"]
install_requirements_cmd = ["python", "-m", "pip", "install", "-r", "requirements.txt"]
install_connector_package_cmd = ["python", "-m", "pip", "install", "."]

container = with_python_package(context, python_environment, package_source_code_path, exclude=exclude)
if requirements_txt := await get_file_contents(container, "requirements.txt"):
for line in requirements_txt.split("\n"):
if line.startswith("-e ."):
local_dependency_path = package_source_code_path + "/" + line[3:]
container = container.with_mounted_directory(
"/" + local_dependency_path, context.get_repo_dir(local_dependency_path, exclude=DEFAULT_PYTHON_EXCLUDE)
)
container = container.with_exec(install_local_requirements_cmd)

container = container.with_exec(install_connector_package_cmd)
setup_dependencies, requirements_dependencies = await find_local_python_dependencies(context, package_source_code_path)
for dependency_directory in setup_dependencies + requirements_dependencies:
container = container.with_mounted_directory("/" + dependency_directory, context.get_repo_dir(dependency_directory))

if await get_file_contents(container, "setup.py"):
container = container.with_exec(install_connector_package_cmd)
if await get_file_contents(container, "requirements.txt"):
container = container.with_exec(install_requirements_cmd)

if additional_dependency_groups:
container = container.with_exec(
Expand Down Expand Up @@ -677,6 +719,9 @@ async def get_cdk_version_from_python_connector(python_connector: Container) ->


async def with_airbyte_python_connector(context: ConnectorContext, build_platform: Platform) -> Container:
if context.connector.technical_name == "source-file-secure":
return await with_airbyte_python_connector_full_dagger(context, build_platform)

pip_cache: CacheVolume = context.dagger_client.cache_volume("pip_cache")
connector_container = (
context.dagger_client.container(platform=build_platform)
Expand Down Expand Up @@ -732,35 +777,43 @@ async def finalize_build(context: ConnectorContext, connector_container: Contain
return connector_container.with_entrypoint(original_entrypoint)


# This function is not used at the moment as we decided to use Python connectors dockerfile instead of building it with dagger.
# Some python connectors use alpine base image, other use debian... We should unify this.
def with_airbyte_python_connector_full_dagger(context: ConnectorContext, build_platform: Platform) -> Container:
async def with_airbyte_python_connector_full_dagger(context: ConnectorContext, build_platform: Platform) -> Container:
setup_dependencies_to_mount, _ = await find_local_python_dependencies(context, str(context.connector.code_directory))

pip_cache: CacheVolume = context.dagger_client.cache_volume("pip_cache")
base = context.dagger_client.container(platform=build_platform).from_("python:3.9.11-alpine3.15")
base = context.dagger_client.container(platform=build_platform).from_("python:3.9-slim")
snake_case_name = context.connector.technical_name.replace("-", "_")
entrypoint = ["python", "/airbyte/integration_code/main.py"]
builder = (
base.with_workdir("/airbyte/integration_code")
.with_exec(["apk", "--no-cache", "upgrade"])
.with_env_variable("DAGGER_BUILD", "True")
.with_exec(["apt-get", "update"])
.with_mounted_cache("/root/.cache/pip", pip_cache)
.with_exec(["pip", "install", "--upgrade", "pip"])
.with_exec(["apk", "--no-cache", "add", "tzdata", "build-base"])
.with_exec(["apt-get", "install", "-y", "tzdata"])
.with_file("setup.py", context.get_connector_dir(include="setup.py").file("setup.py"))
.with_exec(["pip", "install", "--prefix=/install", "."])
)
return (

for dependency_path in setup_dependencies_to_mount:
in_container_dependency_path = f"/local_dependencies/{Path(dependency_path).name}"
builder = builder.with_mounted_directory(in_container_dependency_path, context.get_repo_dir(dependency_path))

builder = builder.with_exec(["pip", "install", "--prefix=/install", "."])

connector_container = (
base.with_workdir("/airbyte/integration_code")
.with_directory("/usr/local", builder.directory("/install"))
.with_file("/usr/localtime", builder.file("/usr/share/zoneinfo/Etc/UTC"))
.with_new_file("/etc/timezone", "Etc/UTC")
.with_exec(["apk", "--no-cache", "add", "bash"])
.with_exec(["apt-get", "install", "-y", "bash"])
.with_file("main.py", context.get_connector_dir(include="main.py").file("main.py"))
.with_directory(snake_case_name, context.get_connector_dir(include=snake_case_name).directory(snake_case_name))
.with_env_variable("AIRBYTE_ENTRYPOINT", " ".join(entrypoint))
.with_entrypoint(entrypoint)
.with_label("io.airbyte.version", context.metadata["dockerImageTag"])
.with_label("io.airbyte.name", context.metadata["dockerRepository"])
)
return await finalize_build(context, connector_container)


def with_crane(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import anyio
from ci_connector_ops.pipelines.actions import environments
from ci_connector_ops.pipelines.utils import get_file_contents
from dagger import Directory

if TYPE_CHECKING:
Expand All @@ -22,11 +23,11 @@ async def mask_secrets_in_gha_logs(ci_credentials_with_downloaded_secrets: Conta
We're not doing it directly from the ci_credentials tool because its stdout is wrapped around the dagger logger,
And GHA will only interpret lines starting with ::add-mask:: as secrets to mask.
"""
secrets_to_mask = await ci_credentials_with_downloaded_secrets.file("/tmp/secrets_to_mask.txt").contents()
for secret_to_mask in secrets_to_mask.splitlines():
# We print directly to stdout because the GHA runner will mask only if the log line starts with "::add-mask::"
# If we use the dagger logger, or context logger, the log line will start with other stuff and will not be masked
print(f"::add-mask::{secret_to_mask}")
if secrets_to_mask := await get_file_contents(ci_credentials_with_downloaded_secrets, "/tmp/secrets_to_mask.txt"):
for secret_to_mask in secrets_to_mask.splitlines():
# We print directly to stdout because the GHA runner will mask only if the log line starts with "::add-mask::"
# If we use the dagger logger, or context logger, the log line will start with other stuff and will not be masked
print(f"::add-mask::{secret_to_mask}")


async def download(context: ConnectorContext, gcp_gsm_env_variable_name: str = "GCP_GSM_CREDENTIALS") -> Directory:
Expand Down
Loading

0 comments on commit c2b7043

Please sign in to comment.