Skip to content

Commit

Permalink
Merge branch 'master' into ddavydov/3398-oncall-bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
davydov-d authored Nov 3, 2023
2 parents 2220237 + 0f68752 commit e4a1eae
Show file tree
Hide file tree
Showing 240 changed files with 2,884 additions and 3,511 deletions.
1 change: 1 addition & 0 deletions airbyte-cdk/java/airbyte-cdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ MavenLocal debugging steps:

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0.4.0 | 2023-11-02 | [\#32050](https://github.com/airbytehq/airbyte/pull/32050) | Add 's3-destinations' CDK module. |
| 0.3.0 | 2023-11-02 | [\#31983](https://github.com/airbytehq/airbyte/pull/31983) | Add deinterpolation feature to AirbyteExceptionHandler. |
| 0.2.4 | 2023-10-31 | [\#31807](https://github.com/airbytehq/airbyte/pull/31807) | Handle case of debezium update and delete of records in mongodb. |
| 0.2.3 | 2023-10-31 | [\#32022](https://github.com/airbytehq/airbyte/pull/32022) | Update Debezium version from 2.20 -> 2.4.0. |
Expand Down
27 changes: 0 additions & 27 deletions airbyte-cdk/java/airbyte-cdk/config-models-oss/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,3 @@ jsonSchema2Pojo {
tasks.register('generate').configure {
dependsOn tasks.named('generateJsonSchema2Pojo')
}

test {
useJUnitPlatform {
excludeTags 'log4j2-config', 'logger-client'
}
testLogging {
events "passed", "skipped", "failed"
}
}

tasks.register('log4j2IntegrationTest', Test) {
useJUnitPlatform {
includeTags 'log4j2-config'
}
testLogging {
events "passed", "skipped", "failed"
}
}

tasks.register('logClientsIntegrationTest', Test) {
useJUnitPlatform {
includeTags 'logger-client'
}
testLogging {
events "passed", "skipped", "failed"
}
}
16 changes: 0 additions & 16 deletions airbyte-cdk/java/airbyte-cdk/core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,13 @@ dependencies {
testImplementation project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons-cli')
testImplementation project(':airbyte-cdk:java:airbyte-cdk:config-models-oss')

implementation ('com.github.airbytehq:json-avro-converter:1.1.0') { exclude group: 'ch.qos.logback', module: 'logback-classic'}

// SSH dependencies
implementation 'net.i2p.crypto:eddsa:0.3.0'

// First party test dependencies
testImplementation project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons')
testImplementation testFixtures(project(':airbyte-cdk:java:airbyte-cdk:db-sources'))

implementation 'com.github.alexmojaki:s3-stream-upload:2.2.2'

testFixturesImplementation "org.hamcrest:hamcrest-all:1.3"

testImplementation libs.bundles.junit
Expand All @@ -63,7 +59,6 @@ dependencies {
implementation('com.google.cloud:google-cloud-bigquery:1.133.1')
implementation 'org.mongodb:mongodb-driver-sync:4.3.0'
implementation libs.postgresql
implementation ('org.apache.parquet:parquet-avro:1.12.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'}

// testImplementation libs.junit.jupiter.api
implementation libs.hikaricp
Expand Down Expand Up @@ -93,22 +88,11 @@ dependencies {
testFixturesImplementation 'org.projectlombok:lombok:1.18.20'
testFixturesAnnotationProcessor 'org.projectlombok:lombok:1.18.20'

implementation ('org.apache.hadoop:hadoop-aws:3.3.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'}
implementation ('org.apache.hadoop:hadoop-mapreduce-client-core:3.3.3') {exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.slf4j', module: 'slf4j-reload4j'}

testImplementation libs.junit.jupiter.system.stubs

implementation libs.jackson.annotations
implementation group: 'org.apache.logging.log4j', name: 'log4j-layout-template-json', version: '2.17.2'

implementation group: 'com.hadoop.gplcompression', name: 'hadoop-lzo', version: '0.4.20'

// parquet
implementation ('org.apache.hadoop:hadoop-common:3.3.3') {
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
exclude group: 'org.slf4j', module: 'slf4j-reload4j'
}

testImplementation 'org.apache.commons:commons-lang3:3.11'
testImplementation 'org.xerial.snappy:snappy-java:1.1.8.4'
testImplementation 'org.mockito:mockito-core:4.6.1'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

package io.airbyte.cdk.integrations.destination.dest_state_lifecycle_manager;

import com.amazonaws.util.StringUtils;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import io.airbyte.protocol.models.v0.AirbyteMessage;
Expand Down Expand Up @@ -50,7 +49,8 @@ public void addState(final AirbyteMessage message) {
Preconditions.checkArgument(message.getState().getType() == AirbyteStateType.STREAM);
final StreamDescriptor originalStreamId = message.getState().getStream().getStreamDescriptor();
final StreamDescriptor actualStreamId;
if (StringUtils.isNullOrEmpty(originalStreamId.getNamespace())) {
final String namespace = originalStreamId.getNamespace();
if (namespace == null || namespace.isEmpty()) {
// If the state's namespace is null/empty, we need to be able to find it using the default namespace
// (because many destinations actually set records' namespace to the default namespace before
// they make it into this class).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.core.LoggerContext;
import org.apache.logging.log4j.core.appender.OutputStreamAppender;
import org.apache.logging.log4j.core.config.Configuration;
import org.apache.logging.log4j.core.config.Configurator;
import org.apache.logging.log4j.core.config.LoggerConfig;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
Expand All @@ -43,14 +43,15 @@ public class AirbyteLogMessageTemplateTest {
public static final String CONSOLE_JSON_APPENDER = "ConsoleJSONAppender";
private static OutputStreamAppender outputStreamAppender;
private static LoggerConfig rootLoggerConfig;
private static LoggerContext loggerContext;

@BeforeAll
static void init() {
// We are creating a log appender with the same output pattern
// as the console json appender defined in this project's log4j2.xml file.
// We then attach this log appender with the LOGGER instance so that we can validate the logs
// produced by code and assert that it matches the expected format.
final LoggerContext loggerContext = (LoggerContext) LogManager.getContext(false);
loggerContext = Configurator.initialize(null, "log4j2.xml");
final Configuration configuration = loggerContext.getConfiguration();
rootLoggerConfig = configuration.getLoggerConfig("");

Expand All @@ -71,6 +72,7 @@ void setup() {
static void cleanUp() {
outputStreamAppender.stop();
rootLoggerConfig.removeAppender(OUTPUT_STREAM_APPENDER);
loggerContext.close();
}

@Test
Expand Down
4 changes: 1 addition & 3 deletions airbyte-cdk/java/airbyte-cdk/db-destinations/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ java {
dependencies {
// Depends on core CDK classes (OK 👍)
implementation project(':airbyte-cdk:java:airbyte-cdk:core')

compileOnly project(':airbyte-cdk:java:airbyte-cdk:typing-deduping')
testImplementation project(':airbyte-cdk:java:airbyte-cdk:typing-deduping')
testFixturesCompileOnly project(':airbyte-cdk:java:airbyte-cdk:acceptance-test-harness')
Expand All @@ -29,8 +30,6 @@ dependencies {

implementation ('com.github.airbytehq:json-avro-converter:1.1.0') { exclude group: 'ch.qos.logback', module: 'logback-classic'}

implementation 'com.github.alexmojaki:s3-stream-upload:2.2.2'

testFixturesImplementation "org.hamcrest:hamcrest-all:1.3"

implementation libs.bundles.junit
Expand Down Expand Up @@ -82,7 +81,6 @@ dependencies {
testFixturesImplementation 'org.projectlombok:lombok:1.18.20'
testFixturesAnnotationProcessor 'org.projectlombok:lombok:1.18.20'

implementation ('org.apache.hadoop:hadoop-aws:3.3.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'}
implementation ('org.apache.hadoop:hadoop-common:3.3.3') {exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.slf4j', module: 'slf4j-reload4j'}
implementation ('org.apache.hadoop:hadoop-mapreduce-client-core:3.3.3') {exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.slf4j', module: 'slf4j-reload4j'}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public static OnStartFunction onStartFunction(final JdbcDatabase database,
final String dstTableName = writeConfig.getOutputTableName();
final String stageName = stagingOperations.getStageName(schema, dstTableName);
final String stagingPath =
stagingOperations.getStagingPath(StagingConsumerFactory.RANDOM_CONNECTION_ID, schema, stream, writeConfig.getWriteDatetime());
stagingOperations.getStagingPath(SerialStagingConsumerFactory.RANDOM_CONNECTION_ID, schema, stream, writeConfig.getWriteDatetime());

log.info("Preparing staging area in destination started for schema {} stream {}: target table: {}, stage: {}",
schema, stream, dstTableName, stagingPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ public static FlushBufferFunction function(
final String schemaName = writeConfig.getOutputSchemaName();
final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName());
final String stagingPath =
stagingOperations.getStagingPath(StagingConsumerFactory.RANDOM_CONNECTION_ID, schemaName, writeConfig.getStreamName(),
stagingOperations.getStagingPath(
SerialStagingConsumerFactory.RANDOM_CONNECTION_ID, schemaName, writeConfig.getStreamName(),
writeConfig.getWriteDatetime());
try (writer) {
writer.flush();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.cdk.integrations.destination.staging;

import static java.util.stream.Collectors.toList;

import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.base.Preconditions;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer;
import io.airbyte.cdk.integrations.destination.NamingConventionTransformer;
import io.airbyte.cdk.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer;
import io.airbyte.cdk.integrations.destination.jdbc.WriteConfig;
import io.airbyte.cdk.integrations.destination.record_buffer.BufferCreateFunction;
import io.airbyte.cdk.integrations.destination.record_buffer.SerializedBufferingStrategy;
import io.airbyte.integrations.base.destination.typing_deduping.ParsedCatalog;
import io.airbyte.integrations.base.destination.typing_deduping.StreamId;
import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOperationValve;
import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteStream;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.DestinationSyncMode;
import java.util.List;
import java.util.UUID;
import java.util.function.Consumer;
import java.util.function.Function;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Uses both Factory and Consumer design pattern to create a single point of creation for consuming
* {@link AirbyteMessage} for processing
*/
public class SerialStagingConsumerFactory {

private static final Logger LOGGER = LoggerFactory.getLogger(SerialStagingConsumerFactory.class);

// using a random string here as a placeholder for the moment.
// This would avoid mixing data in the staging area between different syncs (especially if they
// manipulate streams with similar names)
// if we replaced the random connection id by the actual connection_id, we'd gain the opportunity to
// leverage data that was uploaded to stage
// in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead.
// This would also allow other programs/scripts
// to load (or reload backups?) in the connection's staging area to be loaded at the next sync.
private static final DateTime SYNC_DATETIME = DateTime.now(DateTimeZone.UTC);
public static final UUID RANDOM_CONNECTION_ID = UUID.randomUUID();

public AirbyteMessageConsumer create(final Consumer<AirbyteMessage> outputRecordCollector,
final JdbcDatabase database,
final StagingOperations stagingOperations,
final NamingConventionTransformer namingResolver,
final BufferCreateFunction onCreateBuffer,
final JsonNode config,
final ConfiguredAirbyteCatalog catalog,
final boolean purgeStagingData,
final TypeAndDedupeOperationValve typerDeduperValve,
final TyperDeduper typerDeduper,
final ParsedCatalog parsedCatalog,
final String defaultNamespace,
final boolean useDestinationsV2Columns) {
final List<WriteConfig> writeConfigs = createWriteConfigs(namingResolver, config, catalog, parsedCatalog, useDestinationsV2Columns);
return new BufferedStreamConsumer(
outputRecordCollector,
GeneralStagingFunctions.onStartFunction(database, stagingOperations, writeConfigs, typerDeduper),
new SerializedBufferingStrategy(
onCreateBuffer,
catalog,
SerialFlush.function(database, stagingOperations, writeConfigs, catalog, typerDeduperValve, typerDeduper)),
GeneralStagingFunctions.onCloseFunction(database, stagingOperations, writeConfigs, purgeStagingData, typerDeduper),
catalog,
stagingOperations::isValidData,
defaultNamespace);
}

/**
* Creates a list of all {@link WriteConfig} for each stream within a
* {@link ConfiguredAirbyteCatalog}. Each write config represents the configuration settings for
* writing to a destination connector
*
* @param namingResolver {@link NamingConventionTransformer} used to transform names that are
* acceptable by each destination connector
* @param config destination connector configuration parameters
* @param catalog {@link ConfiguredAirbyteCatalog} collection of configured
* {@link ConfiguredAirbyteStream}
* @return list of all write configs for each stream in a {@link ConfiguredAirbyteCatalog}
*/
private static List<WriteConfig> createWriteConfigs(final NamingConventionTransformer namingResolver,
final JsonNode config,
final ConfiguredAirbyteCatalog catalog,
final ParsedCatalog parsedCatalog,
final boolean useDestinationsV2Columns) {

return catalog.getStreams().stream().map(toWriteConfig(namingResolver, config, parsedCatalog, useDestinationsV2Columns)).collect(toList());
}

private static Function<ConfiguredAirbyteStream, WriteConfig> toWriteConfig(final NamingConventionTransformer namingResolver,
final JsonNode config,
final ParsedCatalog parsedCatalog,
final boolean useDestinationsV2Columns) {
return stream -> {
Preconditions.checkNotNull(stream.getDestinationSyncMode(), "Undefined destination sync mode");
final AirbyteStream abStream = stream.getStream();
final String streamName = abStream.getName();

final String outputSchema;
final String tableName;
if (useDestinationsV2Columns) {
final StreamId streamId = parsedCatalog.getStream(abStream.getNamespace(), streamName).id();
outputSchema = streamId.rawNamespace();
tableName = streamId.rawName();
} else {
outputSchema = getOutputSchema(abStream, config.get("schema").asText(), namingResolver);
tableName = namingResolver.getRawTableName(streamName);
}
final String tmpTableName = namingResolver.getTmpTableName(streamName);
final DestinationSyncMode syncMode = stream.getDestinationSyncMode();

final WriteConfig writeConfig =
new WriteConfig(streamName, abStream.getNamespace(), outputSchema, tmpTableName, tableName, syncMode, SYNC_DATETIME);
LOGGER.info("Write config: {}", writeConfig);

return writeConfig;
};
}

private static String getOutputSchema(final AirbyteStream stream,
final String defaultDestSchema,
final NamingConventionTransformer namingResolver) {
return stream.getNamespace() != null
? namingResolver.getNamespace(stream.getNamespace())
: namingResolver.getNamespace(defaultDestSchema);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import java.util.List;
import org.junit.jupiter.api.Test;

class StagingConsumerFactoryTest {
class SerialStagingConsumerFactoryTest {

@Test()
void detectConflictingStreams() {
Expand Down
23 changes: 1 addition & 22 deletions airbyte-cdk/java/airbyte-cdk/db-sources/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,6 @@ java {
}
}


test {
testLogging {
// TODO: Remove this after debugging
showStandardStreams = true
}
}

project.configurations {
// From `base-debezium`:
testFixturesImplementation.extendsFrom implementation
Expand Down Expand Up @@ -79,12 +71,9 @@ dependencies {
testFixturesCompileOnly project(':airbyte-cdk:java:airbyte-cdk:config-models-oss')
testFixturesCompileOnly project(':airbyte-cdk:java:airbyte-cdk:init-oss')

implementation ('com.github.airbytehq:json-avro-converter:1.1.0') { exclude group: 'ch.qos.logback', module: 'logback-classic'}

implementation 'com.github.alexmojaki:s3-stream-upload:2.2.2'

testFixturesImplementation "org.hamcrest:hamcrest-all:1.3"


implementation libs.bundles.junit
// implementation libs.junit.jupiter.api
implementation libs.junit.jupiter.params
Expand All @@ -101,13 +90,7 @@ dependencies {

// Optional dependencies
// TODO: Change these to 'compileOnly' or 'testCompileOnly'
implementation 'com.azure:azure-storage-blob:12.12.0'
implementation('com.google.cloud:google-cloud-bigquery:1.133.1')
implementation 'org.mongodb:mongodb-driver-sync:4.3.0'
implementation libs.postgresql
implementation ('org.apache.parquet:parquet-avro:1.12.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'}

// testImplementation libs.junit.jupiter.api
implementation libs.hikaricp
implementation libs.bundles.debezium.bundle

Expand All @@ -132,10 +115,6 @@ dependencies {
testFixturesImplementation 'org.projectlombok:lombok:1.18.20'
testFixturesAnnotationProcessor 'org.projectlombok:lombok:1.18.20'

implementation ('org.apache.hadoop:hadoop-aws:3.3.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'}
implementation ('org.apache.hadoop:hadoop-common:3.3.3') {exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.slf4j', module: 'slf4j-reload4j'}
implementation ('org.apache.hadoop:hadoop-mapreduce-client-core:3.3.3') {exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.slf4j', module: 'slf4j-reload4j'}

testImplementation libs.junit.jupiter.system.stubs

// From `base-debezium`:
Expand Down
Loading

0 comments on commit e4a1eae

Please sign in to comment.