Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move basic normalization to connectors_base build #7068

Merged
merged 11 commits into from
Oct 16, 2021
1 change: 1 addition & 0 deletions airbyte-integrations/bases/base-normalization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.1.56
LABEL io.airbyte.name=airbyte/normalization
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: "3.7"

services:
normalization:
image: airbyte/normalization:${VERSION}
build:
dockerfile: Dockerfile
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-mssql:
image: airbyte/normalization-mssql:${VERSION}
build:
dockerfile: mssql.Dockerfile
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-mysql:
image: airbyte/normalization-mysql:${VERSION}
build:
dockerfile: mysql.Dockerfile
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-oracle:
image: airbyte/normalization-oracle:${VERSION}
build:
dockerfile: oracle.Dockerfile
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
12 changes: 12 additions & 0 deletions airbyte-integrations/bases/base-normalization/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: "3.7"

# this file only exists so that we can easily check that all of these images exist in docker hub in check_images_exist.sh
services:
normalization:
image: airbyte/normalization:${VERSION}
normalization-mssql:
image: airbyte/normalization-mssql:${VERSION}
normalization-mysql:
image: airbyte/normalization-mysql:${VERSION}
normalization-oracle:
image: airbyte/normalization-oracle:${VERSION}
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ public void specNormalizationValueShouldBeCorrect() throws Exception {
assertEquals(normalizationFromSpec, supportsNormalization());
boolean normalizationRunnerFactorySupportsDestinationImage;
try {
NormalizationRunnerFactory.create(getImageName(), processFactory, "dev");
NormalizationRunnerFactory.create(getImageName(), processFactory);
normalizationRunnerFactorySupportsDestinationImage = true;
} catch (final IllegalStateException e) {
normalizationRunnerFactorySupportsDestinationImage = false;
Expand Down Expand Up @@ -724,8 +724,7 @@ public void testCustomDbtTransformations() throws Exception {

final DbtTransformationRunner runner = new DbtTransformationRunner(processFactory, NormalizationRunnerFactory.create(
getImageName(),
processFactory,
"dev"));
processFactory));
runner.start();
final Path transformationRoot = Files.createDirectories(jobRoot.resolve("transform"));
final OperatorDbt dbtConfig = new OperatorDbt()
Expand Down Expand Up @@ -791,8 +790,7 @@ void testCustomDbtTransformationsFailure() throws Exception {

final DbtTransformationRunner runner = new DbtTransformationRunner(processFactory, NormalizationRunnerFactory.create(
getImageName(),
processFactory,
"dev"));
processFactory));
runner.start();
final Path transformationRoot = Files.createDirectories(jobRoot.resolve("transform"));
final OperatorDbt dbtConfig = new OperatorDbt()
Expand Down Expand Up @@ -979,8 +977,7 @@ private List<AirbyteMessage> runSync(

final NormalizationRunner runner = NormalizationRunnerFactory.create(
getImageName(),
processFactory,
"dev");
processFactory);
runner.start();
final Path normalizationRoot = Files.createDirectories(jobRoot.resolve("normalize"));
if (!runner.normalize(JOB_ID, JOB_ATTEMPT, normalizationRoot, destinationConfig.getDestinationConnectionConfiguration(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
public class NormalizationRunnerFactory {

public static final String BASE_NORMALIZATION_IMAGE_NAME = "airbyte/normalization";
public static final String NORMALIZATION_VERSION = "0.1.56";

static final Map<String, ImmutablePair<String, DefaultNormalizationRunner.DestinationType>> NORMALIZATION_MAPPING =
ImmutableMap.<String, ImmutablePair<String, DefaultNormalizationRunner.DestinationType>>builder()
Expand All @@ -28,14 +29,14 @@ public class NormalizationRunnerFactory {
.put("airbyte/destination-snowflake", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.SNOWFLAKE))
.build();

public static NormalizationRunner create(final String imageName, final ProcessFactory processFactory, final String airbyteVersion) {
public static NormalizationRunner create(final String imageName, final ProcessFactory processFactory) {
final String imageNameWithoutTag = imageName.split(":")[0];
if (NORMALIZATION_MAPPING.containsKey(imageNameWithoutTag)) {
final var valuePair = NORMALIZATION_MAPPING.get(imageNameWithoutTag);
return new DefaultNormalizationRunner(
valuePair.getRight(),
processFactory,
String.format("%s:%s", valuePair.getLeft(), airbyteVersion));
String.format("%s:%s", valuePair.getLeft(), NORMALIZATION_VERSION));
} else {
throw new IllegalStateException(
String.format("Requested normalization for %s, but it is not included in the normalization mappings.", imageName));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,7 @@ private CheckedSupplier<Worker<NormalizationInput, Void>, Exception> getWorkerFa
Math.toIntExact(jobRunConfig.getAttemptId()),
NormalizationRunnerFactory.create(
destinationLauncherConfig.getDockerImage(),
processFactory,
airbyteVersion),
processFactory),
workerEnvironment);
}

Expand Down Expand Up @@ -393,8 +392,7 @@ private CheckedSupplier<Worker<OperatorDbtInput, Void>, Exception> getWorkerFact
new DbtTransformationRunner(
processFactory, NormalizationRunnerFactory.create(
destinationLauncherConfig.getDockerImage(),
processFactory,
airbyteVersion)));
processFactory)));
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ void testMappings() {
for (final Entry<String, ImmutablePair<String, DestinationType>> entry : NormalizationRunnerFactory.NORMALIZATION_MAPPING.entrySet()) {
assertEquals(entry.getValue().getValue(),
((DefaultNormalizationRunner) NormalizationRunnerFactory.create(
String.format("%s:0.1.0", entry.getKey()), processFactory, "test")).getDestinationType());
String.format("%s:0.1.0", entry.getKey()), processFactory)).getDestinationType());
}
assertThrows(IllegalStateException.class,
() -> NormalizationRunnerFactory.create("airbyte/destination-csv:0.1.0", processFactory, "test"));
() -> NormalizationRunnerFactory.create("airbyte/destination-csv:0.1.0", processFactory));
}

}
1 change: 0 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ allprojects {
":airbyte-config:init",
":airbyte-db:lib",
":airbyte-migration",
":airbyte-integrations:bases:base-normalization",
":airbyte-scheduler:app",
":airbyte-workers",
":airbyte-server",
Expand Down
28 changes: 0 additions & 28 deletions docker-compose.build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,31 +50,3 @@ services:
context: airbyte-migration
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization:
image: airbyte/normalization:${VERSION}
build:
dockerfile: Dockerfile
context: airbyte-integrations/bases/base-normalization
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-mssql:
image: airbyte/normalization-mssql:${VERSION}
build:
dockerfile: mssql.Dockerfile
context: airbyte-integrations/bases/base-normalization
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-mysql:
image: airbyte/normalization-mysql:${VERSION}
build:
dockerfile: mysql.Dockerfile
context: airbyte-integrations/bases/base-normalization
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-oracle:
image: airbyte/normalization-oracle:${VERSION}
build:
dockerfile: oracle.Dockerfile
context: airbyte-integrations/bases/base-normalization
labels:
io.airbyte.git-revision: ${GIT_REVISION}
Comment on lines -53 to -80
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ChristopheDuong why were these added here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it is not possible to “edit” and choose what normalization version you want like you can with connectors, I switched normalization version tags to follow airbyte core versions instead as discussed in this thread (I also discussed it with people separately and overall people were supportive of the switch) https://airbytehq.slack.com/archives/C019WEENQRM/p1633710746415900

See also comment making it part of composeBuild: #2054 (comment)

Overall it’d make it easier to publish new normalization images (when releasing airbyte) instead of doing it whenever a PR is merged…. (the docker image is not useable until there is a release of airbyte anyway)

And now that we have multiple docker images for normalization to publish, we'd have to include them in the docker-compose-build.yaml to make them as part of airbyte release?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could revert back but then we need to adapt the publish script and the DefaultNormalizationRunnerFactory too

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a good way forward is to split normalisation out to use it's own versioning and inject this version by env var.

After this we split the builds so normalisation is a separate build from the platform. In this sense, we have 3 build paths 1) connectors 2) normalisation 3) platform. This will allow the platform team to remove Python from our/their deps.

To simplify things, we can continue to release normalisation with the current OSS release. We can also continue to pin to the Airbyte version. There is some unnecessary work, but it's mostly automated so no sweat off our backs.

We can also start with a single global normalisation version. We can add individual connector normalisation versions as we run into those cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, I'm reverting back to how we had it that normalization has its own version that is de coupled from platform. I think the next thing to figure out is how we want to package normalization in the future. It seems like either packaging mysql normalization in the mysql destination container or at least having that container specify the name of the container that should be used to normalize it seem like 2 reasonable paths forward but those are just idea, open to others.

4 changes: 1 addition & 3 deletions settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,12 @@ include ':airbyte-notification' // transitively used by airbyte-workers.
include ':airbyte-scheduler:models' // transitively used by airbyte-workers.
include ':airbyte-scheduler:persistence' // used by airbyte-workers.

// platformm
// platform
if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "PLATFORM") {
include ':airbyte-cli'
include ':airbyte-e2e-testing'
include ':airbyte-migration'
include ':airbyte-integrations:bases:base-normalization'
include ':airbyte-oauth'
include ':airbyte-integrations:bases:airbyte-protocol'
include ':airbyte-scheduler:app'
include ':airbyte-scheduler:client'
include ':airbyte-server'
Expand Down
10 changes: 10 additions & 0 deletions tools/bin/check_images_exist.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@ checkPlatformImages() {
echo "Success! All platform images exist!"
}

checkNormalizationImages() {
# the only way to know what version of normalization the platform is using is looking in NormalizationRunnerFactory.
local image_version;
image_version=$(cat airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java | grep 'NORMALIZATION_VERSION =' | cut -d"=" -f2 | sed 's:;::' | sed -e 's:"::g' | sed -e 's:[[:space:]]::g')
echo "Checking normalization images with version $image_version exist..."
VERSION=$image_version docker-compose -f airbyte-integrations/bases/base-normalization/docker-compose.yaml pull || exit 1
echo "Success! All normalization images exist!"
}

checkConnectorImages() {
echo "Checking connector images exist..."

Expand Down Expand Up @@ -44,6 +53,7 @@ main() {
echo "checking images for: $SUBSET"

[[ "$SUBSET" =~ ^(all|platform)$ ]] && checkPlatformImages
[[ "$SUBSET" =~ ^(all|platform|connectors)$ ]] && checkNormalizationImages
[[ "$SUBSET" =~ ^(all|connectors)$ ]] && checkConnectorImages

echo "Image check complete."
Expand Down
33 changes: 25 additions & 8 deletions tools/integrations/manage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ _check_tag_exists() {
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest inspect "$1" > /dev/null
}

_error_if_tag_exists() {
if _check_tag_exists "$1"; then
error "You're trying to push a version that was already released ($1). Make sure you bump it up."
fi
}

cmd_scaffold() {
echo "Scaffolding connector"
(
Expand Down Expand Up @@ -74,6 +80,9 @@ cmd_publish() {
publish_spec_to_cache=false
fi

# before we start working sanity check that this version has not been published yet, so that we do not spend a lot of
# time building, running tests to realize this version is a duplicate.
_error_if_tag_exists "$versioned_image"

cmd_build "$path" "$run_tests"

Expand All @@ -86,16 +95,24 @@ cmd_publish() {
echo "$versioned_image $versioned_image"
echo "latest_image $latest_image"

docker tag "$image_name:dev" "$versioned_image"
docker tag "$image_name:dev" "$latest_image"
# in case curing the build / tests someone this version has been published.
_error_if_tag_exists "$versioned_image"

if _check_tag_exists "$versioned_image"; then
error "You're trying to push a version that was already released ($versioned_image). Make sure you bump it up."
fi
if [[ "airbyte/normalization" == "${image_name}" ]]; then
echo "Publishing normalization images (version: $versioned_image)"
GIT_REVISION=$(git rev-parse HEAD)
VERSION=$image_version GIT_REVISION=$GIT_REVISION docker-compose -f airbyte-integrations/bases/base-normalization/docker-compose.build.yaml build
VERSION=$image_version GIT_REVISION=$GIT_REVISION docker-compose -f airbyte-integrations/bases/base-normalization/docker-compose.build.yaml push
VERSION=latest GIT_REVISION=$GIT_REVISION docker-compose -f airbyte-integrations/bases/base-normalization/docker-compose.build.yaml build
VERSION=latest GIT_REVISION=$GIT_REVISION docker-compose -f airbyte-integrations/bases/base-normalization/docker-compose.build.yaml push
else
docker tag "$image_name:dev" "$versioned_image"
docker tag "$image_name:dev" "$latest_image"

echo "Publishing new version ($versioned_image)"
docker push "$versioned_image"
docker push "$latest_image"
echo "Publishing new version ($versioned_image)"
docker push "$versioned_image"
docker push "$latest_image"
fi

if [[ "true" == "${publish_spec_to_cache}" ]]; then
echo "Publishing and writing to spec cache."
Expand Down