Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions azure-pipelines-20230430.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ stages:
jobs:
- job: UT_FT_1
displayName: UT hudi-hadoop-common & UT FT client/spark-client
timeoutInMinutes: '90'
timeoutInMinutes: '120'
steps:
- task: Maven@4
displayName: maven install
Expand Down Expand Up @@ -180,7 +180,7 @@ stages:
displayName: Top 100 long-running testcases
- job: UT_FT_2
displayName: FTA hudi-spark
timeoutInMinutes: '180'
timeoutInMinutes: '120'
steps:
- task: Maven@4
displayName: maven install
Expand Down Expand Up @@ -250,7 +250,7 @@ stages:
displayName: Top 100 long-running testcases
- job: UT_FT_4
displayName: UT spark-datasource Java Test 2
timeoutInMinutes: '90'
timeoutInMinutes: '120'
steps:
- task: Maven@4
displayName: maven install
Expand Down Expand Up @@ -285,7 +285,7 @@ stages:
displayName: Top 100 long-running testcases
- job: UT_FT_5
displayName: UT spark-datasource DML
timeoutInMinutes: '90'
timeoutInMinutes: '120'
steps:
- task: Maven@4
displayName: maven install
Expand Down Expand Up @@ -320,7 +320,7 @@ stages:
displayName: Top 100 long-running testcases
- job: UT_FT_6
displayName: UT spark-datasource DDL & Others
timeoutInMinutes: '90'
timeoutInMinutes: '120'
steps:
- task: Maven@4
displayName: maven install
Expand Down Expand Up @@ -355,7 +355,7 @@ stages:
displayName: Top 100 long-running testcases
- job: UT_FT_7
displayName: UT Hudi Streamer & FT utilities
timeoutInMinutes: '90'
timeoutInMinutes: '120'
steps:
- task: Docker@2
displayName: "login to docker hub"
Expand All @@ -378,6 +378,7 @@ stages:
command: 'run'
arguments: >
-v $(Build.SourcesDirectory):/hudi
-v /var/run/docker.sock:/var/run/docker.sock
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is required for running docker in docker

-i docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId)
/bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL) -Phudi-platform-service -Pthrift-gen-source -pl hudi-utilities -am
&& mvn test $(MVN_OPTS_TEST) -Punit-tests $(JACOCO_AGENT_DESTFILE1_ARG) -Dtest="TestHoodie*" -Dsurefire.failIfNoSpecifiedTests=false -pl hudi-utilities
Expand All @@ -404,7 +405,7 @@ stages:
displayName: Top 100 long-running testcases
- job: UT_FT_8
displayName: UT FT Spark and SQL (additional)
timeoutInMinutes: '110'
timeoutInMinutes: '120'
steps:
- task: Maven@4
displayName: maven install
Expand Down Expand Up @@ -457,7 +458,7 @@ stages:
displayName: Top 100 long-running testcases
- job: UT_FT_9
displayName: FT spark 2
timeoutInMinutes: '90'
timeoutInMinutes: '120'
steps:
- task: Maven@4
displayName: maven install
Expand Down Expand Up @@ -492,7 +493,7 @@ stages:
displayName: Top 100 long-running testcases
- job: UT_FT_10
displayName: UT FT common & other modules
timeoutInMinutes: '180'
timeoutInMinutes: '120'
steps:
- task: Docker@2
displayName: "login to docker hub"
Expand All @@ -515,6 +516,7 @@ stages:
command: 'run'
arguments: >
-v $(Build.SourcesDirectory):/hudi
-v /var/run/docker.sock:/var/run/docker.sock
-i docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId)
/bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL) -Phudi-platform-service -Pthrift-gen-source
&& mvn test $(MVN_OPTS_TEST) -Punit-tests -Dsurefire.failIfNoSpecifiedTests=false $(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB10_UT_MODULES)
Expand Down
5 changes: 5 additions & 0 deletions hudi-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@
<artifactId>kryo-shaded</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>localstack</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public class TestTransactionManager extends HoodieCommonTestHarness {
TransactionManager transactionManager;

@BeforeEach
private void init(TestInfo testInfo) throws IOException {
void init(TestInfo testInfo) throws IOException {
initPath();
initMetaClient();
this.writeConfig = getWriteConfig(testInfo.getTags().contains("useLockProviderWithRuntimeError"));
Expand Down
5 changes: 0 additions & 5 deletions hudi-tests-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,5 @@
<artifactId>testcontainers</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this to be where it is required

<artifactId>localstack</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
20 changes: 12 additions & 8 deletions hudi-utilities/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -312,14 +312,6 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<!-- Dropwizard Metrics -->
<dependency>
Expand Down Expand Up @@ -516,6 +508,18 @@
<scope>test</scope>
</dependency>

<!-- Testcontainers -->
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>kafka</artifactId>
<scope>test</scope>
</dependency>

<!-- AWS Services -->
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/aws-java-sdk-sqs -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,12 +459,19 @@ private static String resetTarget(Config configuration, String database, String
*/
public void sync() {
for (TableExecutionContext context : tableExecutionContexts) {
HoodieStreamer streamer = null;
try {
new HoodieStreamer(context.getConfig(), jssc, Option.ofNullable(context.getProperties())).sync();
streamer = new HoodieStreamer(context.getConfig(), jssc, Option.ofNullable(context.getProperties()));
streamer.sync();
successTables.add(Helpers.getTableWithDatabase(context));
streamer.shutdownGracefully();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ensure the instances are shutdown properly

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor. should we move this to finally block?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, makes sense

} catch (Exception e) {
LOG.error("error while running MultiTableDeltaStreamer for table: " + context.getTableName(), e);
failedTables.add(Helpers.getTableWithDatabase(context));
} finally {
if (streamer != null) {
streamer.shutdownGracefully();
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.utilities.callback.kafka.HoodieWriteCommitKafkaCallback;
import org.apache.hudi.utilities.callback.kafka.HoodieWriteCommitKafkaCallbackConfig;
import org.apache.hudi.utilities.testutils.KafkaTestUtils;
import org.apache.hudi.utilities.testutils.UtilitiesTestBase;

import org.apache.spark.streaming.kafka010.KafkaTestUtils;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.apache.hudi.utilities.sources.TestDataSource;
import org.apache.hudi.utilities.sources.TestParquetDFSSourceEmptyBatch;
import org.apache.hudi.utilities.streamer.HoodieStreamer;
import org.apache.hudi.utilities.testutils.KafkaTestUtils;
import org.apache.hudi.utilities.testutils.UtilitiesTestBase;

import org.apache.avro.Schema;
Expand All @@ -62,7 +63,6 @@
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.streaming.kafka010.KafkaTestUtils;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
Expand Down Expand Up @@ -140,15 +140,12 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
protected static String topicName;
protected static String defaultSchemaProviderClassName = FilebasedSchemaProvider.class.getName();
protected static int testNum = 1;

Map<String, String> hudiOpts = new HashMap<>();
public KafkaTestUtils testUtils;
protected static KafkaTestUtils testUtils;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests are now only setting up kafka once to cut down on the overhead

protected Map<String, String> hudiOpts = new HashMap<>();

@BeforeEach
protected void prepareTestSetup() throws IOException {
setupTest();
testUtils = new KafkaTestUtils();
testUtils.setup();
topicName = "topic" + testNum;
prepareInitialConfigs(storage, basePath, testUtils.brokerAddress());
// reset TestDataSource recordInstantTime which may be set by any other test
Expand All @@ -157,13 +154,10 @@ protected void prepareTestSetup() throws IOException {

@AfterEach
public void cleanupKafkaTestUtils() {
if (testUtils != null) {
testUtils.teardown();
testUtils = null;
}
if (hudiOpts != null) {
hudiOpts = null;
}
testUtils.deleteTopics();
}

@BeforeAll
Expand All @@ -173,10 +167,14 @@ public static void initClass() throws Exception {
PARQUET_SOURCE_ROOT = basePath + "parquetFiles";
ORC_SOURCE_ROOT = basePath + "orcFiles";
JSON_KAFKA_SOURCE_ROOT = basePath + "jsonKafkaFiles";
testUtils = new KafkaTestUtils().setup();
}

@AfterAll
public static void tearDown() {
if (testUtils != null) {
testUtils.teardown();
}
UtilitiesTestBase.cleanUpUtilitiesTestServices();
}

Expand Down Expand Up @@ -704,31 +702,31 @@ static HoodieDeltaStreamer.Config makeConfigForHudiIncrSrc(String srcBasePath, S
static void assertAtleastNCompactionCommits(int minExpected, String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage, tablePath);
HoodieTimeline timeline = meta.getActiveTimeline().getCommitAndReplaceTimeline().filterCompletedInstants();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed logging to avoid the toString calls while I was updating this file

int numCompactionCommits = timeline.countInstants();
assertTrue(minExpected <= numCompactionCommits, "Got=" + numCompactionCommits + ", exp >=" + minExpected);
}

static void assertAtleastNDeltaCommits(int minExpected, String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.getActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numDeltaCommits = timeline.countInstants();
assertTrue(minExpected <= numDeltaCommits, "Got=" + numDeltaCommits + ", exp >=" + minExpected);
}

static void assertAtleastNCompactionCommitsAfterCommit(int minExpected, String lastSuccessfulCommit, String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.getActiveTimeline().getCommitAndReplaceTimeline().findInstantsAfter(lastSuccessfulCommit).filterCompletedInstants();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numCompactionCommits = timeline.countInstants();
assertTrue(minExpected <= numCompactionCommits, "Got=" + numCompactionCommits + ", exp >=" + minExpected);
}

static void assertAtleastNDeltaCommitsAfterCommit(int minExpected, String lastSuccessfulCommit, String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.reloadActiveTimeline().getDeltaCommitTimeline().findInstantsAfter(lastSuccessfulCommit).filterCompletedInstants();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numDeltaCommits = timeline.countInstants();
assertTrue(minExpected <= numDeltaCommits, "Got=" + numDeltaCommits + ", exp >=" + minExpected);
}
Expand Down Expand Up @@ -783,55 +781,55 @@ static void waitFor(BooleanSupplier booleanSupplier) {
static void assertAtLeastNCommits(int minExpected, String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.getActiveTimeline().filterCompletedInstants();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numDeltaCommits = timeline.countInstants();
assertTrue(minExpected <= numDeltaCommits, "Got=" + numDeltaCommits + ", exp >=" + minExpected);
}

static void assertAtLeastNReplaceCommits(int minExpected, String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.getActiveTimeline().getCompletedReplaceTimeline();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numDeltaCommits = timeline.countInstants();
assertTrue(minExpected <= numDeltaCommits, "Got=" + numDeltaCommits + ", exp >=" + minExpected);
}

static void assertPendingIndexCommit(String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.reloadActiveTimeline().getAllCommitsTimeline().filterPendingIndexTimeline();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numIndexCommits = timeline.countInstants();
assertEquals(1, numIndexCommits, "Got=" + numIndexCommits + ", exp=1");
}

static void assertCompletedIndexCommit(String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.reloadActiveTimeline().getAllCommitsTimeline().filterCompletedIndexTimeline();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numIndexCommits = timeline.countInstants();
assertEquals(1, numIndexCommits, "Got=" + numIndexCommits + ", exp=1");
}

static void assertNoReplaceCommits(String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.getActiveTimeline().getCompletedReplaceTimeline();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numDeltaCommits = timeline.countInstants();
assertEquals(0, numDeltaCommits, "Got=" + numDeltaCommits + ", exp =" + 0);
}

static void assertAtLeastNClusterRequests(int minExpected, String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.getActiveTimeline().filterPendingClusteringTimeline();
LOG.info("Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numDeltaCommits = timeline.countInstants();
assertTrue(minExpected <= numDeltaCommits, "Got=" + numDeltaCommits + ", exp >=" + minExpected);
}

static void assertAtLeastNCommitsAfterRollback(int minExpectedRollback, int minExpectedCommits, String tablePath) {
HoodieTableMetaClient meta = createMetaClient(storage.getConf(), tablePath);
HoodieTimeline timeline = meta.getActiveTimeline().getRollbackTimeline().filterCompletedInstants();
LOG.info("Rollback Timeline Instants=" + meta.getActiveTimeline().getInstants());
LOG.info("Rollback Timeline Instants={}", meta.getActiveTimeline().getInstants());
int numRollbackCommits = timeline.countInstants();
assertTrue(minExpectedRollback <= numRollbackCommits, "Got=" + numRollbackCommits + ", exp >=" + minExpectedRollback);
HoodieInstant firstRollback = timeline.getInstants().get(0);
Expand All @@ -842,4 +840,22 @@ static void assertAtLeastNCommitsAfterRollback(int minExpectedRollback, int minE
assertTrue(minExpectedCommits <= numCommits, "Got=" + numCommits + ", exp >=" + minExpectedCommits);
}
}

protected void syncOnce(HoodieDeltaStreamer.Config cfg) throws Exception {
HoodieStreamer streamer = new HoodieDeltaStreamer(cfg, jsc);
streamer.sync();
streamer.shutdownGracefully();
}

protected void syncOnce(HoodieStreamer streamer) throws Exception {
try {
streamer.sync();
} finally {
streamer.shutdownGracefully();
}
}

protected void syncOnce(HoodieStreamer.Config cfg, Option<TypedProperties> properties) throws Exception {
syncOnce(new HoodieStreamer(cfg, jsc, properties));
}
}
Loading
Loading