Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
edgao committed Dec 8, 2021
1 parent adf72a2 commit 7a9e31c
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies {
implementation 'com.fasterxml.jackson.core:jackson-databind'

testImplementation "org.testcontainers:postgresql:1.15.3"
testImplementation "org.mockito:mockito-inline:4.1.0"

integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test')
integrationTestJavaImplementation "org.testcontainers:postgresql:1.15.3"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
import io.airbyte.protocol.models.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.DestinationSyncMode;

// TODO create new S3StreamCopierFactory
/**
* See {@link S3StreamCopierFactory} instead.
*/
@Deprecated
public abstract class LegacyS3StreamCopierFactory implements StreamCopierFactory<S3DestinationConfig> {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,53 @@

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.Mockito.RETURNS_DEEP_STUBS;
import static org.mockito.Mockito.clearInvocations;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.mockConstructionWithAnswer;
import static org.mockito.Mockito.verify;

import alex.mojaki.s3upload.StreamTransferManager;
import com.amazonaws.services.s3.AmazonS3Client;
import io.airbyte.db.jdbc.JdbcDatabase;
import io.airbyte.integrations.destination.ExtendedNameTransformer;
import io.airbyte.integrations.destination.jdbc.SqlOperations;
import io.airbyte.integrations.destination.s3.S3DestinationConfig;
import io.airbyte.protocol.models.AirbyteRecordMessage;
import io.airbyte.protocol.models.DestinationSyncMode;
import java.util.List;
import java.util.UUID;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.ExceptionUtils;
import org.mockito.MockedConstruction;

/**
* Tests to help define what the legacy S3 stream copier did.
* <p>
* Somewhat sketchily verifies what the AmazonS3Client does, even though the stream copier only actually interacts with it via StreamTransferManager
* instances. The interactions are mostly obvious enough that this feels fine.
* <p>
* Does not verify SQL operations, as they're fairly transparent.
*/
public class LegacyS3StreamCopierTest {

public static final int PART_SIZE = 5;

private AmazonS3Client s3Client;
private JdbcDatabase db;
private SqlOperations sqlOperations;
private LegacyS3StreamCopier copier;

private MockedConstruction<StreamTransferManager> streamTransferManagerMockedConstruction;

@BeforeEach
public void setup() {
s3Client = mock(AmazonS3Client.class, RETURNS_DEEP_STUBS);
db = mock(JdbcDatabase.class);
sqlOperations = mock(SqlOperations.class);

streamTransferManagerMockedConstruction = mockConstructionWithAnswer(StreamTransferManager.class, RETURNS_DEEP_STUBS);

copier = new LegacyS3StreamCopier(
"fake-staging-folder",
DestinationSyncMode.OVERWRITE,
Expand All @@ -60,6 +67,7 @@ public void setup() {
"fake-region",
"fake-access-key-id",
"fake-secret-access-key",
PART_SIZE,
null
),
new ExtendedNameTransformer(),
Expand All @@ -74,45 +82,66 @@ public void copyS3CsvFileIntoTable(
final S3DestinationConfig s3Config) {
throw new UnsupportedOperationException("not implemented");
}

};
}

@AfterEach
public void teardown() {
streamTransferManagerMockedConstruction.close();
}

@Test
public void createSequentialStagingFiles_when_multipleFilesRequested() {
// Each file will contain multiple parts, so the first MAX_PARTS_PER_FILE will all go into the same file
for (var i = 0; i < LegacyS3StreamCopier.MAX_PARTS_PER_FILE; i++) {
final String file1 = copier.prepareStagingFile();
assertEquals("fake-staging-folder/fake-schema/fake-stream_00000", file1, "preparing file number " + i);
// When we call prepareStagingFile() the first time, it should create exactly one upload manager
final String firstFile = copier.prepareStagingFile();
assertEquals("fake-staging-folder/fake-schema/fake-stream_00000", firstFile);
final List<StreamTransferManager> firstManagers = streamTransferManagerMockedConstruction.constructed();
final StreamTransferManager firstManager = firstManagers.get(0);
verify(firstManager.numUploadThreads(anyInt()).queueCapacity(anyInt())).partSize(PART_SIZE);
assertEquals(1, firstManagers.size(), "There were actually " + firstManagers.size() + " upload managers");

// Each file will contain multiple parts, so the first MAX_PARTS_PER_FILE will all go into the same file (i.e. we should not start more uploads)
// We've already called prepareStagingFile() once, so only go to MAX_PARTS_PER_FILE - 1
for (var i = 0; i < LegacyS3StreamCopier.MAX_PARTS_PER_FILE - 1; i++) {
final String existingFile = copier.prepareStagingFile();
assertEquals("fake-staging-folder/fake-schema/fake-stream_00000", existingFile, "preparing file number " + i);
final int streamManagerCount = streamTransferManagerMockedConstruction.constructed().size();
assertEquals(1, streamManagerCount, "There were actually " + streamManagerCount + " upload managers");
}
verify(s3Client).initiateMultipartUpload(any());
clearInvocations(s3Client);

final String file2 = copier.prepareStagingFile();
assertEquals("fake-staging-folder/fake-schema/fake-stream_00001", file2);
verify(s3Client).initiateMultipartUpload(any());
// Now that we've hit the MAX_PARTS_PER_FILE, we should start a new upload
final String secondFile = copier.prepareStagingFile();
assertEquals("fake-staging-folder/fake-schema/fake-stream_00001", secondFile);
final List<StreamTransferManager> secondManagers = streamTransferManagerMockedConstruction.constructed();
final StreamTransferManager secondManager = secondManagers.get(1);
verify(secondManager.numUploadThreads(anyInt()).queueCapacity(anyInt())).partSize(PART_SIZE);
assertEquals(2, secondManagers.size(), "There were actually " + secondManagers.size() + " upload managers");
}

@Test
public void closesS3Upload_when_stagingUploaderClosedSuccessfully() throws Exception {
final String file = copier.prepareStagingFile();
copier.write(UUID.randomUUID(), new AirbyteRecordMessage().withEmittedAt(84L), file);
copier.prepareStagingFile();

copier.closeStagingUploader(false);

verify(s3Client).completeMultipartUpload(any());
final List<StreamTransferManager> managers = streamTransferManagerMockedConstruction.constructed();
final StreamTransferManager manager = managers.get(0);
verify(manager).numUploadThreads(10);
verify(manager).complete();
}

@Test
public void closesS3Upload_when_stagingUploaderClosedFailingly() throws Exception {
final String file = copier.prepareStagingFile();
// This is needed to trick the StreamTransferManager into thinking it has data that needs to be written.
copier.write(UUID.randomUUID(), new AirbyteRecordMessage().withEmittedAt(84L), file);

// TODO why does this throw an interruptedexception
final RuntimeException exception = assertThrows(RuntimeException.class, () -> copier.closeStagingUploader(true));

// the wrapping chain is RuntimeException -> ExecutionException -> RuntimeException -> InterruptedException
assertEquals(InterruptedException.class, exception.getCause().getCause().getCause().getClass(), "Original exception: " + ExceptionUtils.readStackTrace(exception));
assertEquals(InterruptedException.class, exception.getCause().getCause().getCause().getClass(),
"Original exception: " + ExceptionUtils.readStackTrace(exception));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
"description": "The name of the staging S3 bucket to use if utilising a COPY strategy. COPY is recommended for production workloads for better speed and scalability. See <a href=\"https://docs.aws.amazon.com/redshift/latest/dg/c_loading-data-best-practices.html\">AWS docs</a> for more details.",
"examples": ["airbyte.staging"]
},
"s3_bucket_path": {
"title": "S3 Bucket Path",
"type": "string",
"description": "The directory under the S3 bucket where data will be written.",
"examples": ["data_sync/test"]
},
"s3_bucket_region": {
"title": "S3 Bucket Region",
"type": "string",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,17 @@ public AmazonS3 getS3Client() {
.withCredentials(new AWSStaticCredentialsProvider(awsCreds))
.build();
}

public S3DestinationConfig cloneWithFormatConfig(final S3FormatConfig formatConfig) {
return new S3DestinationConfig(
this.endpoint,
this.bucketName,
this.bucketPath,
this.bucketRegion,
this.accessKeyId,
this.secretAccessKey,
this.partSize,
formatConfig
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,15 @@ public String getValue() {
private final Long partSize;

public S3CsvFormatConfig(final JsonNode formatConfig) {
this.flattening = Flattening.fromValue(formatConfig.get("flattening").asText());
this.partSize = formatConfig.get(PART_SIZE_MB_ARG_NAME) != null ? formatConfig.get(PART_SIZE_MB_ARG_NAME).asLong() : null;
this(
Flattening.fromValue(formatConfig.get("flattening").asText()),
formatConfig.get(PART_SIZE_MB_ARG_NAME) != null ? formatConfig.get(PART_SIZE_MB_ARG_NAME).asLong() : null
);
}

public S3CsvFormatConfig(final Flattening flattening, final Long partSize) {
this.flattening = flattening;
this.partSize = partSize;
}

@Override
Expand Down
3 changes: 2 additions & 1 deletion airbyte-integrations/connectors/source-jdbc/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dependencies {
testFixturesImplementation 'org.junit.jupiter:junit-jupiter-api:5.4.2'
testFixturesImplementation 'org.junit.jupiter:junit-jupiter-params:5.4.2'
testFixturesImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.4.6'
testFixturesImplementation group: 'org.mockito', name: 'mockito-inline', version: '4.1.0'

implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs)
}
}

0 comments on commit 7a9e31c

Please sign in to comment.