From e52491e88d8e02bf563bdcefdfed5f83d1213f24 Mon Sep 17 00:00:00 2001 From: lmossman Date: Wed, 1 Jun 2022 17:57:17 -0700 Subject: [PATCH 01/14] sweep all scheduler application code and new-scheduler conditional logic --- .../airbyte/bootloader/BootloaderAppTest.java | 3 - .../features/EnvVariableFeatureFlags.java | 9 - .../commons/features/FeatureFlags.java | 2 - airbyte-scheduler/app/Dockerfile | 15 - airbyte-scheduler/app/build.gradle | 70 ---- airbyte-scheduler/app/readme.md | 7 - .../io/airbyte/scheduler/app/JobCleaner.java | 148 -------- .../io/airbyte/scheduler/app/JobLogs.java | 17 - .../io/airbyte/scheduler/app/JobRetrier.java | 114 ------ .../airbyte/scheduler/app/JobScheduler.java | 115 ------ .../airbyte/scheduler/app/JobSubmitter.java | 186 ---------- .../scheduler/app/ScheduleJobPredicate.java | 68 ---- .../airbyte/scheduler/app/SchedulerApp.java | 308 ---------------- .../airbyte/scheduler/app/JobCleanerTest.java | 143 -------- .../io/airbyte/scheduler/app/JobLogsTest.java | 20 -- .../airbyte/scheduler/app/JobRetrierTest.java | 101 ------ .../scheduler/app/JobSchedulerTest.java | 165 --------- .../scheduler/app/JobSubmitterTest.java | 332 ------------------ .../app/ScheduleJobPredicateTest.java | 92 ----- .../client/DefaultSchedulerJobClient.java | 91 ----- .../airbyte/scheduler/client/EventRunner.java | 2 +- .../scheduler/client/TemporalEventRunner.java | 2 +- .../client/DefaultSchedulerJobClientTest.java | 113 ------ .../server/ConfigurationApiFactory.java | 19 - .../java/io/airbyte/server/ServerApp.java | 18 - .../java/io/airbyte/server/ServerFactory.java | 19 +- .../airbyte/server/apis/ConfigurationApi.java | 25 +- .../server/handlers/ConnectionsHandler.java | 71 +--- .../server/handlers/SchedulerHandler.java | 155 +------- .../WebBackendConnectionsHandler.java | 25 +- .../server/apis/ConfigurationApiTest.java | 7 - .../handlers/ConnectionsHandlerTest.java | 64 +--- .../server/handlers/SchedulerHandlerTest.java | 194 +++------- .../WebBackendConnectionsHandlerTest.java | 38 -- .../test/acceptance/AcceptanceTests.java | 240 ++++++------- docker-compose.yaml | 2 - .../dev-integration-test-schedulerv2/.env | 68 ---- .../dev-integration-test-schedulerv2/.secrets | 7 - .../bootloader-patch.yaml | 13 - .../kustomization.yaml | 41 --- .../scheduler-patch.yaml | 15 - .../server-patch.yaml | 15 - .../sync-only-worker.yaml | 230 ------------ .../temporal-ui.yaml | 37 -- .../worker-patch.yaml | 17 - 45 files changed, 192 insertions(+), 3251 deletions(-) delete mode 100644 airbyte-scheduler/app/Dockerfile delete mode 100644 airbyte-scheduler/app/build.gradle delete mode 100644 airbyte-scheduler/app/readme.md delete mode 100644 airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobCleaner.java delete mode 100644 airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobLogs.java delete mode 100644 airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobRetrier.java delete mode 100644 airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobScheduler.java delete mode 100644 airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java delete mode 100644 airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/ScheduleJobPredicate.java delete mode 100644 airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java delete mode 100644 airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobCleanerTest.java delete mode 100644 airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobLogsTest.java delete mode 100644 airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobRetrierTest.java delete mode 100644 airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSchedulerTest.java delete mode 100644 airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java delete mode 100644 airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/ScheduleJobPredicateTest.java delete mode 100644 airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/DefaultSchedulerJobClient.java delete mode 100644 airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/DefaultSchedulerJobClientTest.java delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/.env delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/.secrets delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/bootloader-patch.yaml delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/kustomization.yaml delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/scheduler-patch.yaml delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/server-patch.yaml delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/sync-only-worker.yaml delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/temporal-ui.yaml delete mode 100644 kube/overlays/dev-integration-test-schedulerv2/worker-patch.yaml diff --git a/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java b/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java index 8c2e360f4d9a..e94fe2dffbb5 100644 --- a/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java +++ b/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java @@ -100,7 +100,6 @@ void testBootloaderAppBlankDb() throws Exception { when(mockedConfigs.getJobsDatabaseInitializationTimeoutMs()).thenReturn(60000L); val mockedFeatureFlags = mock(FeatureFlags.class); - when(mockedFeatureFlags.usesNewScheduler()).thenReturn(false); val mockedSecretMigrator = mock(SecretMigrator.class); @@ -157,7 +156,6 @@ void testBootloaderAppRunSecretMigration() throws Exception { when(mockedConfigs.getJobsDatabaseInitializationTimeoutMs()).thenReturn(60000L); val mockedFeatureFlags = mock(FeatureFlags.class); - when(mockedFeatureFlags.usesNewScheduler()).thenReturn(false); final JsonSecretsProcessor jsonSecretsProcessor = JsonSecretsProcessor.builder() .copySecrets(true) @@ -302,7 +300,6 @@ void testPostLoadExecutionExecutes() throws Exception { when(mockedConfigs.getJobsDatabaseInitializationTimeoutMs()).thenReturn(60000L); val mockedFeatureFlags = mock(FeatureFlags.class); - when(mockedFeatureFlags.usesNewScheduler()).thenReturn(false); val mockedSecretMigrator = mock(SecretMigrator.class); diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java b/airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java index 21678000c910..c784a7e3103b 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java @@ -9,15 +9,6 @@ @Slf4j public class EnvVariableFeatureFlags implements FeatureFlags { - @Override - public boolean usesNewScheduler() { - // TODO: sweep this method along with the scheduler - log.info("New Scheduler: true (post-migration)"); - - // After migrating all OSS users onto the new temporal scheduler, this should always return true. - return true; - } - @Override public boolean autoDisablesFailingConnections() { log.info("Auto Disable Failing Connections: " + Boolean.parseBoolean(System.getenv("AUTO_DISABLE_FAILING_CONNECTIONS"))); diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/features/FeatureFlags.java b/airbyte-commons/src/main/java/io/airbyte/commons/features/FeatureFlags.java index 2b84bdc05170..5833028056f3 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/features/FeatureFlags.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/features/FeatureFlags.java @@ -10,8 +10,6 @@ */ public interface FeatureFlags { - boolean usesNewScheduler(); - boolean autoDisablesFailingConnections(); boolean exposeSecretsInExport(); diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile deleted file mode 100644 index a4faf00e56ca..000000000000 --- a/airbyte-scheduler/app/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -ARG JDK_VERSION=17.0.1 -ARG JDK_IMAGE=openjdk:${JDK_VERSION}-slim -FROM ${JDK_IMAGE} AS scheduler - -ARG VERSION=0.39.7-alpha - -ENV APPLICATION airbyte-scheduler -ENV VERSION ${VERSION} - -WORKDIR /app - -ADD bin/${APPLICATION}-${VERSION}.tar /app - -# wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-${VERSION}/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/build.gradle b/airbyte-scheduler/app/build.gradle deleted file mode 100644 index b5f110468699..000000000000 --- a/airbyte-scheduler/app/build.gradle +++ /dev/null @@ -1,70 +0,0 @@ -plugins { - id 'application' -} - -dependencies { - implementation 'io.fabric8:kubernetes-client:5.12.2' - implementation 'io.temporal:temporal-sdk:1.8.1' - - implementation project(':airbyte-analytics') - implementation project(':airbyte-api') - implementation project(':airbyte-config:models') - implementation project(':airbyte-config:persistence') - implementation project(':airbyte-db:lib') - implementation project(':airbyte-json-validation') - implementation project(':airbyte-metrics:lib') - implementation project(':airbyte-protocol:models') - implementation project(':airbyte-scheduler:models') - implementation project(':airbyte-scheduler:persistence') - implementation project(':airbyte-workers') - - testImplementation libs.testcontainers.postgresql -} - -application { - applicationName = "airbyte-scheduler" - mainClass = 'io.airbyte.scheduler.app.SchedulerApp' - applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] -} - -Properties env = new Properties() -rootProject.file('.env.dev').withInputStream { env.load(it) } - -run { - // default for running on local machine. - environment "DATABASE_USER", env.DATABASE_USER - environment "DATABASE_PASSWORD", env.DATABASE_PASSWORD - - environment "CONFIG_DATABASE_USER", env.CONFIG_DATABASE_USER - environment "CONFIG_DATABASE_PASSWORD", env.CONFIG_DATABASE_PASSWORD - - // we map the docker pg db to port 5433 so it does not conflict with other pg instances. - environment "DATABASE_URL", "jdbc:postgresql://localhost:5433/${env.DATABASE_DB}" - environment "CONFIG_DATABASE_URL", "jdbc:postgresql://localhost:5433/${env.CONFIG_DATABASE_DB}" - - environment "RUN_DATABASE_MIGRATION_ON_STARTUP", env.RUN_DATABASE_MIGRATION_ON_STARTUP - - environment "WORKSPACE_ROOT", env.WORKSPACE_ROOT - environment "WORKSPACE_DOCKER_MOUNT", env.WORKSPACE_DOCKER_MOUNT - environment "LOCAL_DOCKER_MOUNT", env.LOCAL_DOCKER_MOUNT - environment "CONFIG_ROOT", "/tmp/airbyte_config" - environment "TRACKING_STRATEGY", env.TRACKING_STRATEGY - environment "AIRBYTE_VERSION", env.VERSION - environment "AIRBYTE_ROLE", System.getenv('AIRBYTE_ROLE') - environment "TEMPORAL_HOST", "localhost:7233" - -} - -task copyGeneratedTar(type: Copy) { - dependsOn copyDocker - dependsOn distTar - - from('build/distributions') { - include 'airbyte-scheduler-*.tar' - } - into 'build/docker/bin' -} - -Task dockerBuildTask = getDockerBuildTask("scheduler", "$project.projectDir", "$rootProject.ext.version", "$rootProject.ext.image_tag") -dockerBuildTask.dependsOn(copyGeneratedTar) -assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-scheduler/app/readme.md b/airbyte-scheduler/app/readme.md deleted file mode 100644 index b1709a58c823..000000000000 --- a/airbyte-scheduler/app/readme.md +++ /dev/null @@ -1,7 +0,0 @@ -# airbyte-scheduler:app - -This module contains the Scheduler App. The main method can be found in `SchedulerApp.java`. The Scheduler is responsible for: -1. Determining if it is time to schedule a Sync Job for a Connection. -2. Submitting pending Jobs to the Workers. -3. Retrying failing Jobs. -4. Clearing out old Job History (so it does not become a space concern). diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobCleaner.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobCleaner.java deleted file mode 100644 index 01d3227097d6..000000000000 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobCleaner.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import com.google.common.collect.Sets; -import io.airbyte.config.WorkspaceRetentionConfig; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.models.JobStatus; -import io.airbyte.scheduler.persistence.JobPersistence; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.attribute.BasicFileAttributes; -import java.nio.file.attribute.FileTime; -import java.time.LocalDateTime; -import java.time.OffsetDateTime; -import java.util.Date; -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.filefilter.AgeFileFilter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The job cleaner is responsible for limiting the retention of files in the workspace root. It does - * this in two ways. 1. It cleans out all files and directories that are older than the maximum - * retention date. 2. It cleans out the oldest files before the minimum retention date until it is - * within the max workspace size. - */ -public class JobCleaner implements Runnable { - - private static final Logger LOGGER = LoggerFactory.getLogger(JobCleaner.class); - - private final Path workspaceRoot; - private final JobPersistence jobPersistence; - - private final WorkspaceRetentionConfig config; - - public JobCleaner(final WorkspaceRetentionConfig config, - final Path workspaceRoot, - final JobPersistence jobPersistence) { - this.config = config; - this.workspaceRoot = workspaceRoot; - this.jobPersistence = jobPersistence; - } - - @Override - public void run() { - try { - deleteOldFiles(); - deleteOnSize(); - } catch (final IOException e) { - throw new RuntimeException(e); - } - } - - private void deleteOldFiles() throws IOException { - final Date oldestAllowed = getDateFromDaysAgo(config.getMaxDays()); - - Files.walk(workspaceRoot) - .map(Path::toFile) - .filter(f -> new AgeFileFilter(oldestAllowed).accept(f)) - .forEach(file -> { - LOGGER.info("Deleting old file: " + file.toString()); - FileUtils.deleteQuietly(file); - - final File parentDir = file.getParentFile(); - if (parentDir.isDirectory() && parentDir.listFiles().length == 0) { - FileUtils.deleteQuietly(parentDir); - } - }); - } - - private void deleteOnSize() throws IOException { - final Set nonTerminalJobIds = new HashSet<>(); - final Sets.SetView nonTerminalStatuses = Sets.difference(Set.of(JobStatus.values()), JobStatus.TERMINAL_STATUSES); - - for (final JobStatus nonTerminalStatus : nonTerminalStatuses) { - final Set jobIds = jobPersistence.listJobsWithStatus(nonTerminalStatus) - .stream() - .map(Job::getId) - .map(String::valueOf) - .collect(Collectors.toSet()); - - nonTerminalJobIds.addAll(jobIds); - } - - final Date youngestAllowed = getDateFromDaysAgo(config.getMinDays()); - - final long workspaceBytes = FileUtils.sizeOfDirectory(workspaceRoot.toFile()); - final AtomicLong deletedBytes = new AtomicLong(0); - final AgeFileFilter ageFilter = new AgeFileFilter(youngestAllowed); - Files.walk(workspaceRoot) - .map(Path::toFile) - .filter(f -> { - Path relativePath = workspaceRoot.relativize(f.toPath()); - - // if the directory is ID/something instead of just ID, get just the ID - if (relativePath.getParent() != null) { - relativePath = workspaceRoot.relativize(f.toPath()).getParent(); - } - - if (!relativePath.toString().equals("")) { - return !nonTerminalJobIds.contains(relativePath.toString()); - } else { - return true; - } - }) - .filter(ageFilter::accept) - .sorted((o1, o2) -> { - final FileTime ft1 = getFileTime(o1); - final FileTime ft2 = getFileTime(o2); - return ft1.compareTo(ft2); - }) - .forEach(fileToDelete -> { - if (workspaceBytes - deletedBytes.get() > config.getMaxSizeMb() * 1024 * 1024) { - final long sizeToDelete = fileToDelete.length(); - deletedBytes.addAndGet(sizeToDelete); - LOGGER.info("Deleting: " + fileToDelete.toString()); - FileUtils.deleteQuietly(fileToDelete); - - final File parentDir = fileToDelete.getParentFile(); - if (parentDir.isDirectory() && parentDir.listFiles().length == 0) { - FileUtils.deleteQuietly(parentDir); - } - } - }); - } - - protected static Date getDateFromDaysAgo(final long daysAgo) { - return Date.from(LocalDateTime.now().minusDays(daysAgo).toInstant(OffsetDateTime.now().getOffset())); - } - - private static FileTime getFileTime(final File file) { - try { - return Files.readAttributes(file.toPath(), BasicFileAttributes.class).creationTime(); - } catch (final IOException e) { - throw new RuntimeException(e); - } - } - -} diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobLogs.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobLogs.java deleted file mode 100644 index 1e1052d05064..000000000000 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobLogs.java +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import java.nio.file.Paths; - -public class JobLogs { - - public static final String ROOT_PATH = "logs/jobs"; - - public static String getLogDirectory(final String scope) { - return Paths.get(ROOT_PATH, scope).toString(); - } - -} diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobRetrier.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobRetrier.java deleted file mode 100644 index 3bccd2078275..000000000000 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobRetrier.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.models.JobStatus; -import io.airbyte.scheduler.persistence.JobNotifier; -import io.airbyte.scheduler.persistence.JobPersistence; -import java.io.IOException; -import java.time.Instant; -import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Supplier; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class JobRetrier implements Runnable { - - private static final Logger LOGGER = LoggerFactory.getLogger(JobRetrier.class); - private static final int RETRY_WAIT_MINUTES = 1; - - private final JobPersistence persistence; - private final Supplier timeSupplier; - private final JobNotifier jobNotifier; - private final int maxSyncJobAttempts; - - public JobRetrier(final JobPersistence jobPersistence, - final Supplier timeSupplier, - final JobNotifier jobNotifier, - final int maxSyncJobAttempts) { - this.persistence = jobPersistence; - this.timeSupplier = timeSupplier; - this.jobNotifier = jobNotifier; - this.maxSyncJobAttempts = maxSyncJobAttempts; - } - - @Override - public void run() { - LOGGER.debug("Running Job Retrier..."); - - final AtomicInteger failedJobs = new AtomicInteger(); - final AtomicInteger retriedJobs = new AtomicInteger(); - final List incompleteJobs = incompleteJobs(); - - incompleteJobs.forEach(job -> { - if (hasReachedMaxAttempt(job)) { - failJob(job); - failedJobs.incrementAndGet(); - } else if (shouldRetry(job)) { - retriedJobs.incrementAndGet(); - resetJob(job); - } - }); - - LOGGER.debug("Completed Job Retrier..."); - - final int incompleteJobCount = incompleteJobs.size(); - final int failedJobCount = failedJobs.get(); - final int retriedJobCount = retriedJobs.get(); - if (incompleteJobCount > 0 || failedJobCount > 0 || retriedJobCount > 0) { - LOGGER.info("Job Retrier Summary. Incomplete jobs: {}, Job set to retry: {}, Jobs set to failed: {}", - incompleteJobs.size(), - failedJobs.get(), - retriedJobs.get()); - } - } - - private List incompleteJobs() { - try { - return persistence.listJobsWithStatus(JobStatus.INCOMPLETE); - } catch (final IOException e) { - throw new RuntimeException("failed to fetch failed jobs", e); - } - } - - private boolean hasReachedMaxAttempt(final Job job) { - if (Job.REPLICATION_TYPES.contains(job.getConfigType())) { - return job.getAttemptsCount() >= maxSyncJobAttempts; - } else { - return job.getAttemptsCount() >= 1; - } - } - - private boolean shouldRetry(final Job job) { - if (Job.REPLICATION_TYPES.contains(job.getConfigType())) { - final long lastRun = job.getUpdatedAtInSecond(); - // todo (cgardens) - use exponential backoff. - return lastRun < timeSupplier.get().getEpochSecond() - TimeUnit.MINUTES.toSeconds(RETRY_WAIT_MINUTES); - } else { - return false; - } - } - - private void failJob(final Job job) { - try { - jobNotifier.failJob("max retry limit was reached", job); - persistence.failJob(job.getId()); - } catch (final IOException e) { - throw new RuntimeException("failed to update status for job: " + job.getId(), e); - } - } - - private void resetJob(final Job job) { - try { - persistence.resetJob(job.getId()); - } catch (final IOException e) { - throw new RuntimeException("failed to update status for job: " + job.getId(), e); - } - } - -} diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobScheduler.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobScheduler.java deleted file mode 100644 index e1c232b9f328..000000000000 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobScheduler.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import com.google.common.annotations.VisibleForTesting; -import io.airbyte.analytics.TrackingClient; -import io.airbyte.config.StandardSync; -import io.airbyte.config.StandardSync.Status; -import io.airbyte.config.persistence.ConfigNotFoundException; -import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.persistence.DefaultJobCreator; -import io.airbyte.scheduler.persistence.JobPersistence; -import io.airbyte.scheduler.persistence.job_factory.DefaultSyncJobFactory; -import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; -import io.airbyte.scheduler.persistence.job_factory.SyncJobFactory; -import io.airbyte.validation.json.JsonValidationException; -import io.airbyte.workers.WorkerConfigs; -import java.io.IOException; -import java.time.Instant; -import java.util.List; -import java.util.Optional; -import java.util.function.BiPredicate; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class JobScheduler implements Runnable { - - private static final Logger LOGGER = LoggerFactory.getLogger(JobScheduler.class); - - private final JobPersistence jobPersistence; - private final ConfigRepository configRepository; - private final BiPredicate, StandardSync> scheduleJobPredicate; - private final SyncJobFactory jobFactory; - - @VisibleForTesting - JobScheduler(final JobPersistence jobPersistence, - final ConfigRepository configRepository, - final BiPredicate, StandardSync> scheduleJobPredicate, - final SyncJobFactory jobFactory) { - this.jobPersistence = jobPersistence; - this.configRepository = configRepository; - this.scheduleJobPredicate = scheduleJobPredicate; - this.jobFactory = jobFactory; - } - - public JobScheduler(final boolean connectorSpecificResourceDefaultsEnabled, - final JobPersistence jobPersistence, - final ConfigRepository configRepository, - final TrackingClient trackingClient, - final WorkerConfigs workerConfigs) { - this( - jobPersistence, - configRepository, - new ScheduleJobPredicate(Instant::now), - new DefaultSyncJobFactory( - connectorSpecificResourceDefaultsEnabled, - new DefaultJobCreator(jobPersistence, configRepository, workerConfigs.getResourceRequirements()), - configRepository, - new OAuthConfigSupplier(configRepository, trackingClient))); - } - - @Override - public void run() { - try { - LOGGER.debug("Running job-scheduler..."); - - scheduleSyncJobs(); - - LOGGER.debug("Completed Job-Scheduler..."); - } catch (final Throwable e) { - LOGGER.error("Job Scheduler Error", e); - } - } - - private void scheduleSyncJobs() throws IOException { - int jobsScheduled = 0; - final var start = System.currentTimeMillis(); - final List activeConnections = getAllActiveConnections(); - final var queryEnd = System.currentTimeMillis(); - LOGGER.debug("Total active connections: {}", activeConnections.size()); - LOGGER.debug("Time to retrieve all connections: {} ms", queryEnd - start); - - for (final StandardSync connection : activeConnections) { - final Optional previousJobOptional = jobPersistence.getLastReplicationJob(connection.getConnectionId()); - - if (scheduleJobPredicate.test(previousJobOptional, connection)) { - jobFactory.create(connection.getConnectionId()); - jobsScheduled++; - SchedulerApp.PENDING_JOBS.getAndIncrement(); - } - } - final var end = System.currentTimeMillis(); - LOGGER.debug("Time taken to schedule jobs: {} ms", end - start); - - if (jobsScheduled > 0) { - LOGGER.info("Job-Scheduler Summary. Active connections: {}, Jobs scheduled this cycle: {}", activeConnections.size(), jobsScheduled); - } - } - - private List getAllActiveConnections() { - try { - return configRepository.listStandardSyncs() - .stream() - .filter(sync -> sync.getStatus() == Status.ACTIVE) - .collect(Collectors.toList()); - } catch (final JsonValidationException | IOException | ConfigNotFoundException e) { - throw new RuntimeException(e.getMessage(), e); - } - } - -} diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java deleted file mode 100644 index cc8a5afd3e50..000000000000 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Sets; -import io.airbyte.commons.concurrency.LifecycledCallable; -import io.airbyte.commons.enums.Enums; -import io.airbyte.config.Configs.WorkerEnvironment; -import io.airbyte.config.JobConfig.ConfigType; -import io.airbyte.config.StandardWorkspace; -import io.airbyte.config.helpers.LogClientSingleton; -import io.airbyte.config.helpers.LogConfigs; -import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.persistence.JobNotifier; -import io.airbyte.scheduler.persistence.JobPersistence; -import io.airbyte.scheduler.persistence.job_tracker.JobTracker; -import io.airbyte.scheduler.persistence.job_tracker.JobTracker.JobState; -import io.airbyte.workers.run.TemporalWorkerRunFactory; -import io.airbyte.workers.run.WorkerRun; -import java.nio.file.Path; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.ExecutorService; -import java.util.function.Consumer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.MDC; - -public class JobSubmitter implements Runnable { - - private static final Logger LOGGER = LoggerFactory.getLogger(JobSubmitter.class); - - private final ExecutorService threadPool; - private final JobPersistence persistence; - private final TemporalWorkerRunFactory temporalWorkerRunFactory; - private final JobTracker jobTracker; - private final JobNotifier jobNotifier; - private final WorkerEnvironment workerEnvironment; - private final LogConfigs logConfigs; - private final ConfigRepository configRepository; - - // See attemptJobSubmit() to understand the need for this Concurrent Set. - private final Set runningJobs = Sets.newConcurrentHashSet(); - - public JobSubmitter(final ExecutorService threadPool, - final JobPersistence persistence, - final TemporalWorkerRunFactory temporalWorkerRunFactory, - final JobTracker jobTracker, - final JobNotifier jobNotifier, - final WorkerEnvironment workerEnvironment, - final LogConfigs logConfigs, - final ConfigRepository configRepository) { - this.threadPool = threadPool; - this.persistence = persistence; - this.temporalWorkerRunFactory = temporalWorkerRunFactory; - this.jobTracker = jobTracker; - this.jobNotifier = jobNotifier; - this.workerEnvironment = workerEnvironment; - this.logConfigs = logConfigs; - this.configRepository = configRepository; - } - - @Override - public void run() { - try { - LOGGER.debug("Running job-submitter..."); - final var start = System.currentTimeMillis(); - - final Optional nextJob = persistence.getNextJob(); - - nextJob.ifPresent(attemptJobSubmit()); - - final var end = System.currentTimeMillis(); - LOGGER.debug("Completed Job-Submitter. Time taken: {} ms", end - start); - } catch (final Throwable e) { - LOGGER.error("Job Submitter Error", e); - } - } - - /** - * Since job submission and job execution happen in two separate thread pools, and job execution is - * what removes a job from the submission queue, it is possible for a job to be submitted multiple - * times. - *

- * This synchronised block guarantees only a single thread can utilise the concurrent set to decide - * whether a job should be submitted. This job id is added here, and removed in the finish block of - * {@link #submitJob(Job)}. - *

- * Since {@link JobPersistence#getNextJob()} returns the next queued job, this solution cause - * head-of-line blocking as the JobSubmitter tries to submit the same job. However, this suggests - * the Worker Pool needs more workers and is inevitable when dealing with pending jobs. - *

- * See https://github.com/airbytehq/airbyte/issues/4378 for more info. - */ - synchronized private Consumer attemptJobSubmit() { - return job -> { - if (!runningJobs.contains(job.getId())) { - runningJobs.add(job.getId()); - trackSubmission(job); - submitJob(job); - final var pending = SchedulerApp.PENDING_JOBS.decrementAndGet(); - LOGGER.info("Job-Submitter Summary. Submitted job with scope {}", job.getScope()); - LOGGER.debug("Pending jobs: {}", pending); - } else { - LOGGER.info("Attempting to submit already running job {}. There are probably too many queued jobs.", job.getId()); - LOGGER.debug("Pending jobs: {}", SchedulerApp.PENDING_JOBS.get()); - } - }; - } - - @VisibleForTesting - void submitJob(final Job job) { - - final WorkerRun workerRun = temporalWorkerRunFactory.create(job); - // we need to know the attempt number before we begin the job lifecycle. thus we state what the - // attempt number should be. if it is not, that the lifecycle will fail. this should not happen as - // long as job submission for a single job is single threaded. this is a compromise to allow the job - // persistence to control what the attempt number should be while still allowing us to declare it - // before the lifecycle begins. - final int attemptNumber = job.getAttempts().size(); - threadPool.submit(new LifecycledCallable.Builder<>(workerRun) - .setOnStart(() -> { - // TODO(Issue-4204): This should save the fully qualified job path. - final Path logFilePath = workerRun.getJobRoot().resolve(LogClientSingleton.LOG_FILENAME); - final long persistedAttemptId = persistence.createAttempt(job.getId(), logFilePath); - assertSameIds(attemptNumber, persistedAttemptId); - LogClientSingleton.getInstance().setJobMdc(workerEnvironment, logConfigs, workerRun.getJobRoot()); - }) - .setOnSuccess(output -> { - LOGGER.debug("Job id {} succeeded", job.getId()); - if (output.getOutput().isPresent()) { - persistence.writeOutput(job.getId(), attemptNumber, output.getOutput().get()); - } - - if (output.getStatus() == io.airbyte.workers.JobStatus.SUCCEEDED) { - persistence.succeedAttempt(job.getId(), attemptNumber); - - if (job.getConfigType() == ConfigType.SYNC) { - final String connectionId = job.getScope(); - final StandardWorkspace workspace = configRepository.getStandardWorkspaceFromConnection(UUID.fromString(connectionId), false); - - if (workspace.getFirstCompletedSync() == null || !workspace.getFirstCompletedSync()) { - workspace.setFirstCompletedSync(true); - configRepository.writeStandardWorkspace(workspace); - } - - jobNotifier.successJob(job); - } - } else { - persistence.failAttempt(job.getId(), attemptNumber); - } - trackCompletion(job, output.getStatus()); - }) - .setOnException(e -> { - LOGGER.error("Exception thrown in Job Submission: ", e); - persistence.failAttempt(job.getId(), attemptNumber); - trackCompletion(job, io.airbyte.workers.JobStatus.FAILED); - }) - .setOnFinish(() -> { - runningJobs.remove(job.getId()); - LOGGER.debug("Job id {} cleared", job.getId()); - MDC.clear(); - }) - .build()); - } - - private void assertSameIds(final long expectedAttemptId, final long actualAttemptId) { - if (expectedAttemptId != actualAttemptId) { - throw new IllegalStateException("Created attempt was not the expected attempt"); - } - } - - private void trackSubmission(final Job job) { - jobTracker.trackSync(job, JobState.STARTED); - } - - private void trackCompletion(final Job job, final io.airbyte.workers.JobStatus status) { - jobTracker.trackSync(job, Enums.convertTo(status, JobState.class)); - } - -} diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/ScheduleJobPredicate.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/ScheduleJobPredicate.java deleted file mode 100644 index b2e9ece2768a..000000000000 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/ScheduleJobPredicate.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import io.airbyte.config.StandardSync; -import io.airbyte.config.helpers.ScheduleHelpers; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.models.JobStatus; -import java.time.Instant; -import java.util.Optional; -import java.util.function.BiPredicate; -import java.util.function.Supplier; - -public class ScheduleJobPredicate implements BiPredicate, StandardSync> { - - private final Supplier timeSupplier; - - public ScheduleJobPredicate(final Supplier timeSupplier) { - this.timeSupplier = timeSupplier; - } - - @Override - public boolean test(final Optional previousJobOptional, final StandardSync standardSync) { - // if manual scheduler, then we never programmatically schedule. - if (standardSync.getManual()) { - return false; - } - - final boolean timeForNewJob = isTimeForNewJob(previousJobOptional, standardSync); - return shouldSchedule(previousJobOptional, timeForNewJob); - } - - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - private boolean shouldSchedule(final Optional previousJobOptional, final boolean timeForJobNewJob) { - if (previousJobOptional.isEmpty()) { - return true; - } - - final Job previousJob = previousJobOptional.get(); - return switch (previousJob.getStatus()) { - case CANCELLED, SUCCEEDED, FAILED -> timeForJobNewJob; - case INCOMPLETE, PENDING, RUNNING -> false; - default -> throw new IllegalArgumentException("Unrecognized status: " + previousJob.getStatus()); - }; - } - - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - private boolean isTimeForNewJob(final Optional previousJobOptional, final StandardSync standardSync) { - // if non-manual scheduler, and there has never been a previous run, always schedule. - if (previousJobOptional.isEmpty()) { - return true; - } - - final Job previousJob = previousJobOptional.get(); - - // if there is an active job, do not start a new one. - if (!JobStatus.TERMINAL_STATUSES.contains(previousJob.getStatus())) { - return false; - } - - final long prevRunStart = previousJob.getStartedAtInSecond().orElse(previousJob.getCreatedAtInSecond()); - final long nextRunStart = prevRunStart + ScheduleHelpers.getIntervalInSecond(standardSync.getSchedule()); - return nextRunStart < timeSupplier.get().getEpochSecond(); - } - -} diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java deleted file mode 100644 index 10da65adc0ab..000000000000 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import io.airbyte.analytics.Deployment; -import io.airbyte.analytics.TrackingClient; -import io.airbyte.analytics.TrackingClientSingleton; -import io.airbyte.api.client.AirbyteApiClient; -import io.airbyte.api.client.invoker.generated.ApiClient; -import io.airbyte.api.client.invoker.generated.ApiException; -import io.airbyte.api.client.model.generated.HealthCheckRead; -import io.airbyte.commons.concurrency.GracefulShutdownHandler; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.features.FeatureFlags; -import io.airbyte.commons.lang.CloseableShutdownHook; -import io.airbyte.commons.version.AirbyteVersion; -import io.airbyte.config.Configs; -import io.airbyte.config.Configs.WorkerEnvironment; -import io.airbyte.config.EnvConfigs; -import io.airbyte.config.helpers.LogClientSingleton; -import io.airbyte.config.helpers.LogConfigs; -import io.airbyte.config.persistence.ConfigPersistence; -import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.config.persistence.DatabaseConfigPersistence; -import io.airbyte.config.persistence.split_secrets.JsonSecretsProcessor; -import io.airbyte.db.Database; -import io.airbyte.db.factory.DSLContextFactory; -import io.airbyte.db.factory.DataSourceFactory; -import io.airbyte.metrics.lib.DatadogClientConfiguration; -import io.airbyte.metrics.lib.DogStatsDMetricSingleton; -import io.airbyte.metrics.lib.MetricEmittingApps; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.models.JobStatus; -import io.airbyte.scheduler.persistence.DefaultJobPersistence; -import io.airbyte.scheduler.persistence.JobNotifier; -import io.airbyte.scheduler.persistence.JobPersistence; -import io.airbyte.scheduler.persistence.WorkspaceHelper; -import io.airbyte.scheduler.persistence.job_tracker.JobTracker; -import io.airbyte.workers.WorkerConfigs; -import io.airbyte.workers.run.TemporalWorkerRunFactory; -import io.airbyte.workers.temporal.TemporalClient; -import java.io.IOException; -import java.nio.file.Path; -import java.time.Duration; -import java.time.Instant; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import javax.sql.DataSource; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.MDC; - -/** - * The SchedulerApp is responsible for finding new scheduled jobs that need to be run and to launch - * them. The current implementation uses two thread pools to do so. One pool is responsible for all - * job launching operations. The other pool is responsible for clean up operations. - *

- * Operations can have thread pools under the hood. An important thread pool to note is that the job - * submitter thread pool. This pool does the work of submitting jobs to temporal - the size of this - * pool determines the number of concurrent jobs that can be run. This is controlled via the - * SUBMITTER_NUM_THREADS variable of EnvConfigs. - */ -public class SchedulerApp { - - public static final AtomicInteger PENDING_JOBS = new AtomicInteger(); - - private static final Logger LOGGER = LoggerFactory.getLogger(SchedulerApp.class); - - private static final long GRACEFUL_SHUTDOWN_SECONDS = 30; - private static final Duration SCHEDULING_DELAY = Duration.ofSeconds(5); - private static final Duration CLEANING_DELAY = Duration.ofHours(2); - private static final ThreadFactory THREAD_FACTORY = new ThreadFactoryBuilder().setNameFormat("worker-%d").build(); - private static final String DRIVER_CLASS_NAME = "org.postgresql.Driver"; - - private final Path workspaceRoot; - private final JobPersistence jobPersistence; - private final ConfigRepository configRepository; - private final JobCleaner jobCleaner; - private final JobNotifier jobNotifier; - private final TemporalClient temporalClient; - private final int submitterNumThreads; - private final int maxSyncJobAttempts; - private final String airbyteVersionOrWarnings; - private final WorkerEnvironment workerEnvironment; - private final LogConfigs logConfigs; - - public SchedulerApp(final Path workspaceRoot, - final JobPersistence jobPersistence, - final ConfigRepository configRepository, - final JobCleaner jobCleaner, - final JobNotifier jobNotifier, - final TemporalClient temporalClient, - final Integer submitterNumThreads, - final Integer maxSyncJobAttempts, - final String airbyteVersionOrWarnings, - final WorkerEnvironment workerEnvironment, - final LogConfigs logConfigs) { - this.workspaceRoot = workspaceRoot; - this.jobPersistence = jobPersistence; - this.configRepository = configRepository; - this.jobCleaner = jobCleaner; - this.jobNotifier = jobNotifier; - this.temporalClient = temporalClient; - this.submitterNumThreads = submitterNumThreads; - this.maxSyncJobAttempts = maxSyncJobAttempts; - this.airbyteVersionOrWarnings = airbyteVersionOrWarnings; - this.workerEnvironment = workerEnvironment; - this.logConfigs = logConfigs; - } - - public void start() throws IOException { - final Configs configs = new EnvConfigs(); - final WorkerConfigs workerConfigs = new WorkerConfigs(configs); - final FeatureFlags featureFlags = new EnvVariableFeatureFlags(); - if (!featureFlags.usesNewScheduler()) { - final ExecutorService workerThreadPool = Executors.newFixedThreadPool(submitterNumThreads, THREAD_FACTORY); - final ScheduledExecutorService scheduleJobsPool = Executors.newSingleThreadScheduledExecutor(); - final ScheduledExecutorService executeJobsPool = Executors.newSingleThreadScheduledExecutor(); - final ScheduledExecutorService cleanupJobsPool = Executors.newSingleThreadScheduledExecutor(); - final TemporalWorkerRunFactory temporalWorkerRunFactory = new TemporalWorkerRunFactory( - temporalClient, - workspaceRoot, - airbyteVersionOrWarnings, - featureFlags); - final JobRetrier jobRetrier = new JobRetrier(jobPersistence, Instant::now, jobNotifier, maxSyncJobAttempts); - final TrackingClient trackingClient = TrackingClientSingleton.get(); - final JobScheduler jobScheduler = new JobScheduler( - configs.connectorSpecificResourceDefaultsEnabled(), - jobPersistence, - configRepository, - trackingClient, - workerConfigs); - final JobSubmitter jobSubmitter = new JobSubmitter( - workerThreadPool, - jobPersistence, - temporalWorkerRunFactory, - new JobTracker(configRepository, jobPersistence, trackingClient), - jobNotifier, workerEnvironment, logConfigs, configRepository); - - final Map mdc = MDC.getCopyOfContextMap(); - - // We cancel jobs that where running before the restart. They are not being monitored by the worker - // anymore. - cleanupZombies(jobPersistence, jobNotifier); - - LOGGER.info("Start running the old scheduler"); - scheduleJobsPool.scheduleWithFixedDelay( - () -> { - MDC.setContextMap(mdc); - jobRetrier.run(); - jobScheduler.run(); - }, - 0L, - SCHEDULING_DELAY.toSeconds(), - TimeUnit.SECONDS); - - executeJobsPool.scheduleWithFixedDelay( - () -> { - MDC.setContextMap(mdc); - jobSubmitter.run(); - }, - 0L, - SCHEDULING_DELAY.toSeconds(), - TimeUnit.SECONDS); - - cleanupJobsPool.scheduleWithFixedDelay( - () -> { - MDC.setContextMap(mdc); - jobCleaner.run(); - jobPersistence.purgeJobHistory(); - }, - CLEANING_DELAY.toSeconds(), - CLEANING_DELAY.toSeconds(), - TimeUnit.SECONDS); - - Runtime.getRuntime().addShutdownHook(new GracefulShutdownHandler(Duration.ofSeconds(GRACEFUL_SHUTDOWN_SECONDS), workerThreadPool, - scheduleJobsPool, executeJobsPool, cleanupJobsPool)); - } - } - - private void cleanupZombies(final JobPersistence jobPersistence, final JobNotifier jobNotifier) throws IOException { - for (final Job zombieJob : jobPersistence.listJobsWithStatus(JobStatus.RUNNING)) { - jobNotifier.failJob("zombie job was failed", zombieJob); - - final int currentAttemptNumber = zombieJob.getAttemptsCount() - 1; - - LOGGER.warn( - "zombie clean up - job attempt was failed. job id: {}, attempt number: {}, type: {}, scope: {}", - zombieJob.getId(), - currentAttemptNumber, - zombieJob.getConfigType(), - zombieJob.getScope()); - - jobPersistence.failAttempt( - zombieJob.getId(), - currentAttemptNumber); - } - } - - public static void waitForServer(final Configs configs) throws InterruptedException { - final AirbyteApiClient apiClient = new AirbyteApiClient( - new ApiClient().setScheme("http") - .setHost(configs.getAirbyteApiHost()) - .setPort(configs.getAirbyteApiPort()) - .setBasePath("/api")); - - boolean isHealthy = false; - while (!isHealthy) { - try { - final HealthCheckRead healthCheck = apiClient.getHealthApi().getHealthCheck(); - isHealthy = healthCheck.getAvailable(); - } catch (final ApiException e) { - LOGGER.info("Waiting for server to become available..."); - Thread.sleep(2000); - } - } - } - - public static void main(final String[] args) throws IOException, InterruptedException { - - final Configs configs = new EnvConfigs(); - - LogClientSingleton.getInstance().setWorkspaceMdc(configs.getWorkerEnvironment(), configs.getLogConfigs(), - LogClientSingleton.getInstance().getSchedulerLogsRoot(configs.getWorkspaceRoot())); - - final Path workspaceRoot = configs.getWorkspaceRoot(); - LOGGER.info("workspaceRoot = " + workspaceRoot); - - final String temporalHost = configs.getTemporalHost(); - LOGGER.info("temporalHost = " + temporalHost); - - final DataSource configsDataSource = DataSourceFactory.create(configs.getConfigDatabaseUser(), configs.getConfigDatabasePassword(), - DRIVER_CLASS_NAME, configs.getConfigDatabaseUrl()); - - final DataSource jobsDataSource = DataSourceFactory.create(configs.getDatabaseUser(), configs.getDatabasePassword(), - DRIVER_CLASS_NAME, configs.getDatabaseUrl()); - - // Manual configuration that will be replaced by Dependency Injection in the future - try (final DSLContext configsDslContext = DSLContextFactory.create(configsDataSource, SQLDialect.POSTGRES); - final DSLContext jobsDslContext = DSLContextFactory.create(jobsDataSource, SQLDialect.POSTGRES)) { - - // Ensure that the database resources are closed on application shutdown - CloseableShutdownHook.registerRuntimeShutdownHook(configsDataSource, jobsDataSource, configsDslContext, jobsDslContext); - - // Wait for the server to initialize the database and run migration - // This should be converted into check for the migration version. Everything else as per. - waitForServer(configs); - LOGGER.info("Creating Job DB connection pool..."); - final Database jobDatabase = new Database(jobsDslContext); - final Database configDatabase = new Database(configsDslContext); - final FeatureFlags featureFlags = new EnvVariableFeatureFlags(); - final JsonSecretsProcessor jsonSecretsProcessor = JsonSecretsProcessor.builder() - .maskSecrets(!featureFlags.exposeSecretsInExport()) - .copySecrets(true) - .build(); - final ConfigPersistence configPersistence = DatabaseConfigPersistence.createWithValidation(configDatabase, jsonSecretsProcessor); - final ConfigRepository configRepository = new ConfigRepository(configPersistence, configDatabase); - - final JobPersistence jobPersistence = new DefaultJobPersistence(jobDatabase); - final JobCleaner jobCleaner = new JobCleaner( - configs.getWorkspaceRetentionConfig(), - workspaceRoot, - jobPersistence); - AirbyteVersion.assertIsCompatible( - configs.getAirbyteVersion(), - jobPersistence.getVersion().map(AirbyteVersion::new).orElseThrow()); - - TrackingClientSingleton.initialize( - configs.getTrackingStrategy(), - new Deployment(configs.getDeploymentMode(), jobPersistence.getDeployment().orElseThrow(), configs.getWorkerEnvironment()), - configs.getAirbyteRole(), - configs.getAirbyteVersion(), - configRepository); - final JobNotifier jobNotifier = new JobNotifier( - configs.getWebappUrl(), - configRepository, - new WorkspaceHelper(configRepository, jobPersistence), - TrackingClientSingleton.get()); - final TemporalClient temporalClient = TemporalClient.production(temporalHost, workspaceRoot, configs); - - DogStatsDMetricSingleton.initialize(MetricEmittingApps.SCHEDULER, new DatadogClientConfiguration(configs)); - - LOGGER.info("Launching scheduler..."); - new SchedulerApp( - workspaceRoot, - jobPersistence, - configRepository, - jobCleaner, - jobNotifier, - temporalClient, - Integer.parseInt(configs.getSubmitterNumThreads()), - configs.getSyncJobMaxAttempts(), - configs.getAirbyteVersionOrWarning(), configs.getWorkerEnvironment(), configs.getLogConfigs()) - .start(); - } - } - -} diff --git a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobCleanerTest.java b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobCleanerTest.java deleted file mode 100644 index fd56bb251d36..000000000000 --- a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobCleanerTest.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import io.airbyte.config.WorkspaceRetentionConfig; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.models.JobStatus; -import io.airbyte.scheduler.persistence.JobPersistence; -import java.io.File; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -class JobCleanerTest { - - @TempDir - Path folder; - - @Test - public void testNotDeletingFilesInMinimum() throws IOException { - createFile(folder.resolve("1"), "A", 1, 10); - - final JobPersistence jobPersistence = mock(JobPersistence.class); - - final JobCleaner jobCleaner = new JobCleaner( - new WorkspaceRetentionConfig(20, 30, 0), - folder, - jobPersistence); - - final Set before = listFiles(folder); - jobCleaner.run(); - final Set after = listFiles(folder); - - assertFalse(before.isEmpty()); - assertEquals(before, after); - } - - @Test - public void testDeletingOldFiles() throws IOException { - createFile(folder.resolve("1"), "A", 1, 100); - - final JobPersistence jobPersistence = mock(JobPersistence.class); - - final JobCleaner jobCleaner = new JobCleaner( - new WorkspaceRetentionConfig(20, 30, 0), - folder, - jobPersistence); - - final Set before = listFiles(folder); - jobCleaner.run(); - final Set after = listFiles(folder); - - final Set expected = Set.of(""); - - assertFalse(before.isEmpty()); - assertEquals(expected, after); - } - - @Test - public void testDeletingLargeFiles() throws IOException { - createFile(folder.resolve("1"), "A", 1, 10); - createFile(folder.resolve("1"), "B", 1, 10); - createFile(folder.resolve("1"), "C", 1, 10); - createFile(folder.resolve("2"), "D", 1, 18); - createFile(folder.resolve("2"), "E", 1, 19); - createFile(folder.resolve("2"), "F", 1, 20); - - final JobPersistence jobPersistence = mock(JobPersistence.class); - - final JobCleaner jobCleaner = new JobCleaner( - new WorkspaceRetentionConfig(1, 30, 4), - folder, - jobPersistence); - - jobCleaner.run(); - final Set after = listFiles(folder); - final Set expected = Set.of("", "/1", "/1/A", "/1/B", "/1/C", "/2", "/2/D"); - - assertEquals(expected, after); - } - - @Test - public void testNotDeletingRunning() throws IOException { - createFile(folder.resolve("1"), "A", 1, 10); - createFile(folder.resolve("1"), "B", 1, 10); - createFile(folder.resolve("1"), "C", 1, 10); - createFile(folder.resolve("2"), "D", 1, 18); - createFile(folder.resolve("2"), "E", 1, 19); - createFile(folder.resolve("2"), "F", 1, 20); - - final JobPersistence jobPersistence = mock(JobPersistence.class); - final Job job2 = mock(Job.class); - when(job2.getId()).thenReturn(2L); - when(jobPersistence.listJobsWithStatus(JobStatus.RUNNING)).thenReturn(List.of(job2)); - - final JobCleaner jobCleaner = new JobCleaner( - new WorkspaceRetentionConfig(1, 30, 0), - folder, - jobPersistence); - - jobCleaner.run(); - final Set after = listFiles(folder); - final Set expected = Set.of("", "/2", "/2/D", "/2/E", "/2/F"); - - assertEquals(expected, after); - } - - private void createFile(final Path subdirectory, final String filename, final int sizeMb, final int daysAgo) throws IOException { - final long lastModified = JobCleaner.getDateFromDaysAgo(daysAgo).getTime(); - final File subdirFile = subdirectory.toFile(); - if (!subdirFile.exists()) { - subdirFile.mkdir(); - subdirFile.setLastModified(lastModified); - } - - final File file = subdirectory.resolve(filename).toFile(); - file.createNewFile(); - - final RandomAccessFile raf = new RandomAccessFile(file, "rw"); - raf.setLength(sizeMb * 1024 * 1024); - raf.close(); - - file.setLastModified(lastModified); - } - - private Set listFiles(final Path dir) throws IOException { - return Files.walk(dir).map(Path::toString).map(x -> x.replace(folder.toString(), "")).collect(Collectors.toSet()); - } - -} diff --git a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobLogsTest.java b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobLogsTest.java deleted file mode 100644 index c848e222e96e..000000000000 --- a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobLogsTest.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.Test; - -class JobLogsTest { - - @Test - public void testGetLogDirectory() { - final String actual = JobLogs.getLogDirectory("blah"); - final String expected = "logs/jobs/blah"; - assertEquals(expected, actual); - } - -} diff --git a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobRetrierTest.java b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobRetrierTest.java deleted file mode 100644 index d4b485549580..000000000000 --- a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobRetrierTest.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoMoreInteractions; -import static org.mockito.Mockito.when; - -import io.airbyte.config.JobConfig; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.models.JobStatus; -import io.airbyte.scheduler.persistence.JobNotifier; -import io.airbyte.scheduler.persistence.JobPersistence; -import java.io.IOException; -import java.time.Duration; -import java.time.Instant; -import java.util.List; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class JobRetrierTest { - - private static final Instant NOW = Instant.now(); - - private JobNotifier jobNotifier; - private JobPersistence persistence; - private JobRetrier jobRetrier; - private Job incompleteSyncJob; - private Job incompleteSpecJob; - - @BeforeEach - void setup() throws IOException { - jobNotifier = mock(JobNotifier.class); - persistence = mock(JobPersistence.class); - - jobRetrier = new JobRetrier(persistence, () -> NOW, jobNotifier, 3); - incompleteSyncJob = mock(Job.class); - when(incompleteSyncJob.getId()).thenReturn(12L); - when(incompleteSyncJob.getStatus()).thenReturn(JobStatus.INCOMPLETE); - when(incompleteSyncJob.getConfigType()).thenReturn(JobConfig.ConfigType.SYNC); - - incompleteSpecJob = mock(Job.class); - when(incompleteSpecJob.getId()).thenReturn(42L); - when(incompleteSpecJob.getStatus()).thenReturn(JobStatus.INCOMPLETE); - when(incompleteSpecJob.getConfigType()).thenReturn(JobConfig.ConfigType.GET_SPEC); - } - - @Test - void testSyncJobTimeToRetry() throws IOException { - when(persistence.listJobsWithStatus(JobStatus.INCOMPLETE)).thenReturn(List.of(incompleteSyncJob)); - when(incompleteSyncJob.getAttemptsCount()).thenReturn(1); - when(incompleteSyncJob.getUpdatedAtInSecond()).thenReturn(NOW.minus(Duration.ofMinutes(2)).getEpochSecond()); - - jobRetrier.run(); - - verify(persistence).listJobsWithStatus(JobStatus.INCOMPLETE); - verify(persistence).resetJob(incompleteSyncJob.getId()); - verifyNoMoreInteractions(persistence); - } - - @Test - void testToSoonToRetry() throws IOException { - when(persistence.listJobsWithStatus(JobStatus.INCOMPLETE)).thenReturn(List.of(incompleteSyncJob)); - when(incompleteSyncJob.getAttemptsCount()).thenReturn(1); - when(incompleteSyncJob.getUpdatedAtInSecond()).thenReturn(NOW.minus(Duration.ofSeconds(10)).getEpochSecond()); - - jobRetrier.run(); - - verify(persistence).listJobsWithStatus(JobStatus.INCOMPLETE); - verifyNoMoreInteractions(persistence); - } - - @Test - void testTooManySyncJobFailures() throws IOException { - when(persistence.listJobsWithStatus(JobStatus.INCOMPLETE)).thenReturn(List.of(incompleteSyncJob)); - when(incompleteSyncJob.getAttemptsCount()).thenReturn(5); - when(incompleteSyncJob.getUpdatedAtInSecond()).thenReturn(NOW.minus(Duration.ofMinutes(2)).getEpochSecond()); - - jobRetrier.run(); - - verify(persistence).listJobsWithStatus(JobStatus.INCOMPLETE); - verify(persistence).failJob(incompleteSyncJob.getId()); - verifyNoMoreInteractions(persistence); - } - - @Test - void testSpecJobFailure() throws IOException { - when(persistence.listJobsWithStatus(JobStatus.INCOMPLETE)).thenReturn(List.of(incompleteSpecJob)); - when(incompleteSpecJob.getAttemptsCount()).thenReturn(1); - - jobRetrier.run(); - - verify(persistence).listJobsWithStatus(JobStatus.INCOMPLETE); - verify(persistence).failJob(incompleteSpecJob.getId()); - verifyNoMoreInteractions(persistence); - } - -} diff --git a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSchedulerTest.java b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSchedulerTest.java deleted file mode 100644 index 2ddead58803b..000000000000 --- a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSchedulerTest.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import io.airbyte.commons.json.Jsons; -import io.airbyte.config.JobSyncConfig.NamespaceDefinitionType; -import io.airbyte.config.StandardSync; -import io.airbyte.config.StandardSync.Status; -import io.airbyte.config.StandardSyncOperation; -import io.airbyte.config.persistence.ConfigNotFoundException; -import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.protocol.models.CatalogHelpers; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import io.airbyte.protocol.models.Field; -import io.airbyte.protocol.models.JsonSchemaType; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.persistence.JobPersistence; -import io.airbyte.scheduler.persistence.job_factory.SyncJobFactory; -import io.airbyte.validation.json.JsonValidationException; -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Optional; -import java.util.UUID; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class JobSchedulerTest { - - private static final StandardSync STANDARD_SYNC; - private static final List STANDARD_SYNC_OPERATIONS; - private static final long JOB_ID = 12L; - private Job previousJob; - - private static final String STREAM_NAME = "users"; - private static final String FIELD_NAME = "id"; - - static { - final UUID sourceId = UUID.randomUUID(); - - final UUID destinationId = UUID.randomUUID(); - - final ConfiguredAirbyteStream stream = new ConfiguredAirbyteStream() - .withStream(CatalogHelpers.createAirbyteStream(STREAM_NAME, Field.of(FIELD_NAME, JsonSchemaType.STRING))); - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(stream)); - - final UUID connectionId = UUID.randomUUID(); - final UUID operationId = UUID.randomUUID(); - - STANDARD_SYNC = new StandardSync() - .withConnectionId(connectionId) - .withName("presto to hudi") - .withNamespaceDefinition(NamespaceDefinitionType.SOURCE) - .withNamespaceFormat(null) - .withPrefix("presto_to_hudi") - .withStatus(StandardSync.Status.ACTIVE) - .withCatalog(catalog) - .withSourceId(sourceId) - .withDestinationId(destinationId) - .withOperationIds(List.of(operationId)); - - // empty. contents not needed for any of these unit tests. - STANDARD_SYNC_OPERATIONS = List.of(new StandardSyncOperation().withOperationId(operationId)); - } - - private ConfigRepository configRepository; - private JobPersistence jobPersistence; - private ScheduleJobPredicate scheduleJobPredicate; - private SyncJobFactory jobFactory; - private JobScheduler scheduler; - - @BeforeEach - public void setup() { - configRepository = mock(ConfigRepository.class); - jobPersistence = mock(JobPersistence.class); - - scheduleJobPredicate = mock(ScheduleJobPredicate.class); - jobFactory = mock(SyncJobFactory.class); - scheduler = new JobScheduler(jobPersistence, configRepository, scheduleJobPredicate, jobFactory); - - previousJob = mock(Job.class); - } - - @Test - public void testScheduleJob() throws JsonValidationException, ConfigNotFoundException, IOException { - when(jobPersistence.getLastReplicationJob(STANDARD_SYNC.getConnectionId())) - .thenReturn(java.util.Optional.of(previousJob)); - when(scheduleJobPredicate.test(Optional.of(previousJob), STANDARD_SYNC)).thenReturn(true); - when(jobFactory.create(STANDARD_SYNC.getConnectionId())).thenReturn(JOB_ID); - setConfigMocks(); - - scheduler.run(); - - verifyConfigCalls(); - verify(scheduleJobPredicate).test(Optional.of(previousJob), STANDARD_SYNC); - verify(jobPersistence).getLastReplicationJob(STANDARD_SYNC.getConnectionId()); - verify(jobFactory).create(STANDARD_SYNC.getConnectionId()); - } - - @Test - public void testScheduleJobNoPreviousJob() throws JsonValidationException, ConfigNotFoundException, IOException { - when(jobPersistence.getLastReplicationJob(STANDARD_SYNC.getConnectionId())) - .thenReturn(java.util.Optional.empty()); - when(scheduleJobPredicate.test(Optional.empty(), STANDARD_SYNC)).thenReturn(true); - when(jobFactory.create(STANDARD_SYNC.getConnectionId())).thenReturn(JOB_ID); - setConfigMocks(); - - scheduler.run(); - - verifyConfigCalls(); - verify(scheduleJobPredicate).test(Optional.empty(), STANDARD_SYNC); - verify(jobPersistence).getLastReplicationJob(STANDARD_SYNC.getConnectionId()); - verify(jobFactory).create(STANDARD_SYNC.getConnectionId()); - } - - @Test - public void testDoNotScheduleJob() throws JsonValidationException, ConfigNotFoundException, IOException { - when(jobPersistence.getLastReplicationJob(STANDARD_SYNC.getConnectionId())) - .thenReturn(java.util.Optional.of(previousJob)); - when(scheduleJobPredicate.test(Optional.of(previousJob), STANDARD_SYNC)).thenReturn(false); - setConfigMocks(); - - scheduler.run(); - - verifyConfigCalls(); - verify(scheduleJobPredicate).test(Optional.of(previousJob), STANDARD_SYNC); - verify(jobPersistence).getLastReplicationJob(STANDARD_SYNC.getConnectionId()); - verify(jobFactory, never()).create(STANDARD_SYNC.getConnectionId()); - } - - @Test - public void testDoesNotScheduleNonActiveConnections() throws JsonValidationException, ConfigNotFoundException, IOException { - final StandardSync standardSync = Jsons.clone(STANDARD_SYNC); - standardSync.setStatus(Status.INACTIVE); - when(configRepository.listStandardSyncs()).thenReturn(Collections.singletonList(standardSync)); - - scheduler.run(); - - verify(configRepository).listStandardSyncs(); - verify(scheduleJobPredicate, never()).test(Optional.of(previousJob), STANDARD_SYNC); - verify(jobPersistence, never()).getLastReplicationJob(standardSync.getConnectionId()); - verify(jobFactory, never()).create(standardSync.getConnectionId()); - } - - // sets all mocks that are related to fetching configs. these are the same for all tests in this - // test suite. - private void setConfigMocks() throws JsonValidationException, ConfigNotFoundException, IOException { - when(configRepository.listStandardSyncs()).thenReturn(Collections.singletonList(STANDARD_SYNC)); - } - - // verify all mocks that are related to fetching configs are called. these are the same for all - // tests in this test suite. - private void verifyConfigCalls() throws ConfigNotFoundException, IOException, JsonValidationException { - verify(configRepository).listStandardSyncs(); - } - -} diff --git a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java deleted file mode 100644 index b453ea928c2b..000000000000 --- a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.RETURNS_DEEP_STUBS; -import static org.mockito.Mockito.atLeast; -import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.inOrder; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoInteractions; -import static org.mockito.Mockito.when; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.MoreExecutors; -import io.airbyte.config.Configs.WorkerEnvironment; -import io.airbyte.config.JobConfig.ConfigType; -import io.airbyte.config.JobOutput; -import io.airbyte.config.StandardWorkspace; -import io.airbyte.config.helpers.LogClientSingleton; -import io.airbyte.config.helpers.LogConfigs; -import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.persistence.JobNotifier; -import io.airbyte.scheduler.persistence.JobPersistence; -import io.airbyte.scheduler.persistence.job_tracker.JobTracker; -import io.airbyte.scheduler.persistence.job_tracker.JobTracker.JobState; -import io.airbyte.workers.JobStatus; -import io.airbyte.workers.OutputAndStatus; -import io.airbyte.workers.run.TemporalWorkerRunFactory; -import io.airbyte.workers.run.WorkerRun; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Map; -import java.util.Optional; -import java.util.UUID; -import java.util.concurrent.Executors; -import java.util.concurrent.atomic.AtomicReference; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; -import org.mockito.InOrder; -import org.mockito.Mockito; -import org.slf4j.MDC; - -public class JobSubmitterTest { - - private static final OutputAndStatus SUCCESS_OUTPUT = new OutputAndStatus<>(JobStatus.SUCCEEDED, new JobOutput()); - private static final OutputAndStatus FAILED_OUTPUT = new OutputAndStatus<>(JobStatus.FAILED); - private static final long JOB_ID = 1L; - private static final int ATTEMPT_NUMBER = 12; - - private JobPersistence persistence; - private TemporalWorkerRunFactory workerRunFactory; - private WorkerRun workerRun; - private Job job; - private Path logPath; - - private JobSubmitter jobSubmitter; - private JobTracker jobTracker; - private JobNotifier jobNotifier; - private ConfigRepository configRepository; - - @BeforeEach - public void setup() throws IOException { - job = mock(Job.class, RETURNS_DEEP_STUBS); - jobTracker = mock(JobTracker.class); - when(job.getId()).thenReturn(JOB_ID); - when(job.getAttempts().size()).thenReturn(ATTEMPT_NUMBER); - - workerRun = mock(WorkerRun.class); - final Path jobRoot = Files.createTempDirectory("test"); - final Path logPath = jobRoot.resolve(LogClientSingleton.LOG_FILENAME); - when(workerRun.getJobRoot()).thenReturn(jobRoot); - workerRunFactory = mock(TemporalWorkerRunFactory.class); - when(workerRunFactory.create(job)).thenReturn(workerRun); - - persistence = mock(JobPersistence.class); - this.logPath = jobRoot.resolve(LogClientSingleton.LOG_FILENAME); - when(persistence.getNextJob()).thenReturn(Optional.of(job)); - when(persistence.createAttempt(JOB_ID, logPath)).thenReturn(ATTEMPT_NUMBER); - jobNotifier = mock(JobNotifier.class); - - configRepository = mock(ConfigRepository.class); - - jobSubmitter = spy(new JobSubmitter( - MoreExecutors.newDirectExecutorService(), - persistence, - workerRunFactory, - jobTracker, - jobNotifier, - WorkerEnvironment.DOCKER, - LogConfigs.EMPTY, - configRepository)); - } - - @Test - public void testRun() { - doNothing().when(jobSubmitter).submitJob(any()); - - jobSubmitter.run(); - - verify(jobTracker).trackSync(job, JobState.STARTED); - verify(jobSubmitter).submitJob(job); - } - - @Test - public void testPersistenceNoJob() throws Exception { - doReturn(Optional.empty()).when(persistence).getNextJob(); - - jobSubmitter.run(); - - verifyNoInteractions(workerRunFactory); - verify(jobTracker, never()).trackSync(any(), any()); - } - - @Test - public void testSuccess() throws Exception { - doReturn(SUCCESS_OUTPUT).when(workerRun).call(); - - final StandardWorkspace completedSyncWorkspace = new StandardWorkspace() - .withFirstCompletedSync(true); - final StandardWorkspace nonCompletedSyncWorkspace = new StandardWorkspace() - .withFirstCompletedSync(false); - - when(configRepository.listStandardWorkspaces(false)) - .thenReturn(Lists.newArrayList(completedSyncWorkspace, nonCompletedSyncWorkspace)); - - jobSubmitter.submitJob(job); - - final InOrder inOrder = inOrder(persistence, jobSubmitter); - inOrder.verify(persistence).createAttempt(JOB_ID, logPath); - inOrder.verify(persistence).writeOutput(JOB_ID, ATTEMPT_NUMBER, new JobOutput()); - inOrder.verify(persistence).succeedAttempt(JOB_ID, ATTEMPT_NUMBER); - verify(jobTracker).trackSync(job, JobState.SUCCEEDED); - inOrder.verifyNoMoreInteractions(); - } - - @Test - public void testSuccessCompleteWorkspace() throws Exception { - doReturn(SUCCESS_OUTPUT).when(workerRun).call(); - - final StandardWorkspace completedSyncWorkspace = new StandardWorkspace() - .withFirstCompletedSync(true); - final StandardWorkspace nonCompletedSyncWorkspace = new StandardWorkspace() - .withFirstCompletedSync(false); - - when(configRepository.getStandardWorkspaceFromConnection(any(UUID.class), eq(false))) - .thenReturn(nonCompletedSyncWorkspace); - - when(job.getScope()) - .thenReturn(UUID.randomUUID().toString()); - when(job.getConfigType()) - .thenReturn(ConfigType.SYNC); - - jobSubmitter.submitJob(job); - - verify(configRepository).writeStandardWorkspace(nonCompletedSyncWorkspace); - } - - @Test - public void testFailure() throws Exception { - doReturn(FAILED_OUTPUT).when(workerRun).call(); - - jobSubmitter.run(); - - final InOrder inOrder = inOrder(persistence, jobSubmitter); - inOrder.verify(persistence).createAttempt(JOB_ID, logPath); - inOrder.verify(persistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); - verify(jobTracker).trackSync(job, JobState.FAILED); - inOrder.verifyNoMoreInteractions(); - verifyNoInteractions(configRepository); - } - - @Test - public void testException() throws Exception { - doThrow(new RuntimeException()).when(workerRun).call(); - - jobSubmitter.run(); - - final InOrder inOrder = inOrder(persistence, jobTracker); - inOrder.verify(persistence).createAttempt(JOB_ID, logPath); - inOrder.verify(persistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); - inOrder.verify(jobTracker).trackSync(job, JobState.FAILED); - inOrder.verifyNoMoreInteractions(); - verifyNoInteractions(configRepository); - } - - @Test - public void testPersistenceExceptionMismatchAttemptId() throws Exception { - when(persistence.createAttempt(JOB_ID, logPath)).thenReturn(ATTEMPT_NUMBER + 1); - - jobSubmitter.run(); - - final InOrder inOrder = inOrder(persistence, jobTracker); - inOrder.verify(persistence).createAttempt(JOB_ID, logPath); - inOrder.verify(persistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); - inOrder.verify(jobTracker).trackSync(job, JobState.FAILED); - inOrder.verifyNoMoreInteractions(); - } - - @Test - public void testPersistenceExceptionStart() throws Exception { - doThrow(new RuntimeException()).when(persistence).createAttempt(anyLong(), any()); - - jobSubmitter.run(); - - final InOrder inOrder = inOrder(persistence, jobTracker); - inOrder.verify(persistence).createAttempt(JOB_ID, logPath); - inOrder.verify(persistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); - inOrder.verify(jobTracker).trackSync(job, JobState.FAILED); - inOrder.verifyNoMoreInteractions(); - } - - @Test - public void testPersistenceExceptionOutput() throws Exception { - doReturn(SUCCESS_OUTPUT).when(workerRun).call(); - doThrow(new RuntimeException()).when(persistence).writeOutput(anyLong(), anyInt(), any()); - - jobSubmitter.run(); - - final InOrder inOrder = inOrder(persistence, jobTracker); - inOrder.verify(persistence).createAttempt(JOB_ID, logPath); - inOrder.verify(persistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); - inOrder.verify(jobTracker).trackSync(job, JobState.FAILED); - inOrder.verifyNoMoreInteractions(); - } - - @Test - void testMDC() throws Exception { - final AtomicReference> mdcMap = new AtomicReference<>(); - when(workerRun.call()).then(invocation -> { - mdcMap.set(MDC.getCopyOfContextMap()); - return SUCCESS_OUTPUT; - }); - - jobSubmitter.run(); - - verify(workerRun).call(); - - assertEquals( - ImmutableMap.of( - "job_log_path", workerRun.getJobRoot() + "/" + LogClientSingleton.LOG_FILENAME), - mdcMap.get()); - - assertTrue(MDC.getCopyOfContextMap().isEmpty()); - } - - @Nested - class OnlyOneJobIdRunning { - - /** - * See {@link JobSubmitter#attemptJobSubmit()} to understand why we need to test that only one job - * id can be successfully submited at once. - */ - @Test - public void testOnlyOneJobCanBeSubmittedAtOnce() throws Exception { - final var jobDone = new AtomicReference<>(false); - when(workerRun.call()).thenAnswer((a) -> { - Thread.sleep(5000); - jobDone.set(true); - return SUCCESS_OUTPUT; - }); - - // Simulate the same job being submitted over and over again. - final var simulatedJobSubmitterPool = Executors.newFixedThreadPool(10); - while (!jobDone.get()) { - // This sleep mimics our SchedulerApp loop. - Thread.sleep(1000); - simulatedJobSubmitterPool.submit(() -> { - if (!jobDone.get()) { - jobSubmitter.run(); - } - }); - } - - simulatedJobSubmitterPool.shutdownNow(); - // This is expected to be called at least once due to the various threads. - verify(persistence, atLeast(2)).getNextJob(); - // Assert that the job is actually only submitted once. - verify(jobSubmitter, Mockito.times(1)).submitJob(Mockito.any()); - } - - @Test - public void testSuccessShouldUnlockId() throws Exception { - when(workerRun.call()).thenReturn(SUCCESS_OUTPUT); - - jobSubmitter.run(); - - // This sleep mimics our SchedulerApp loop. - Thread.sleep(1000); - - // If the id was not removed, the second call would not trigger submitJob(). - jobSubmitter.run(); - - verify(persistence, Mockito.times(2)).getNextJob(); - verify(jobSubmitter, Mockito.times(2)).submitJob(Mockito.any()); - } - - @Test - public void testFailureShouldUnlockId() throws Exception { - when(workerRun.call()).thenThrow(new RuntimeException()); - - jobSubmitter.run(); - - // This sleep mimics our SchedulerApp loop. - Thread.sleep(1000); - - // If the id was not removed, the second call would not trigger submitJob(). - jobSubmitter.run(); - - verify(persistence, Mockito.times(2)).getNextJob(); - verify(jobSubmitter, Mockito.times(2)).submitJob(Mockito.any()); - } - - } - -} diff --git a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/ScheduleJobPredicateTest.java b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/ScheduleJobPredicateTest.java deleted file mode 100644 index 078c6a84910d..000000000000 --- a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/ScheduleJobPredicateTest.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.app; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import io.airbyte.config.Schedule; -import io.airbyte.config.StandardSync; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.models.JobStatus; -import java.time.Duration; -import java.time.Instant; -import java.util.Optional; -import java.util.function.Supplier; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.EnumSource; -import org.junit.jupiter.params.provider.EnumSource.Mode; - -class ScheduleJobPredicateTest { - - private static final StandardSync STANDARD_SYNC = new StandardSync() - .withManual(false) - .withSchedule(new Schedule() - .withTimeUnit(Schedule.TimeUnit.DAYS) - .withUnits(1L)); - - private ScheduleJobPredicate scheduleJobPredicate; - private Instant now; - private Job job; - - @SuppressWarnings("unchecked") - @BeforeEach - public void setup() { - final Supplier timeSupplier = mock(Supplier.class); - scheduleJobPredicate = new ScheduleJobPredicate(timeSupplier); - job = mock(Job.class); - when(job.getId()).thenReturn(10L); - now = Instant.now(); - when(timeSupplier.get()).thenReturn(now); - } - - @Test - public void testManualSchedule() { - final StandardSync standardSync = new StandardSync().withManual(true); - assertFalse(scheduleJobPredicate.test(Optional.empty(), standardSync)); - } - - @Test - public void testNoPreviousJob() { - assertTrue(scheduleJobPredicate.test(Optional.empty(), STANDARD_SYNC)); - } - - @Test - public void testScheduleNotReady() { - when(job.getStatus()).thenReturn(JobStatus.SUCCEEDED); - when(job.getStartedAtInSecond()).thenReturn(Optional.of(now.minus(Duration.ofDays(1)).getEpochSecond())); - - assertFalse(scheduleJobPredicate.test(Optional.of(job), STANDARD_SYNC)); - } - - // use Mode.EXCLUDE so that when new values are added to the enum, these tests will fail if that - // value has not also been added to the switch statement. - @ParameterizedTest - @EnumSource(value = JobStatus.class, - mode = Mode.EXCLUDE, - names = {"PENDING", "RUNNING", "INCOMPLETE"}) - public void testShouldScheduleBasedOnPreviousJobStatus(final JobStatus status) { - when(job.getStatus()).thenReturn(status); - when(job.getStartedAtInSecond()).thenReturn(Optional.of(now.minus(Duration.ofDays(2)).getEpochSecond())); - - assertTrue(scheduleJobPredicate.test(Optional.of(job), STANDARD_SYNC), "job status: " + status.toString()); - } - - @ParameterizedTest - @EnumSource(value = JobStatus.class, - mode = Mode.EXCLUDE, - names = {"FAILED", "SUCCEEDED", "CANCELLED"}) - public void testScheduleShouldNotScheduleBasedOnPreviousJobStatus(final JobStatus status) { - when(job.getStatus()).thenReturn(status); - when(job.getStartedAtInSecond()).thenReturn(Optional.of(now.minus(Duration.ofDays(2)).getEpochSecond())); - - assertFalse(scheduleJobPredicate.test(Optional.of(job), STANDARD_SYNC), "job status: " + status.toString()); - } - -} diff --git a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/DefaultSchedulerJobClient.java b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/DefaultSchedulerJobClient.java deleted file mode 100644 index ade2389f5e10..000000000000 --- a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/DefaultSchedulerJobClient.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.client; - -import io.airbyte.config.ActorDefinitionResourceRequirements; -import io.airbyte.config.DestinationConnection; -import io.airbyte.config.SourceConnection; -import io.airbyte.config.StandardSync; -import io.airbyte.config.StandardSyncOperation; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.persistence.JobCreator; -import io.airbyte.scheduler.persistence.JobPersistence; -import java.io.IOException; -import java.util.List; -import java.util.Optional; -import javax.annotation.Nullable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class DefaultSchedulerJobClient implements SchedulerJobClient { - - private static final Logger LOGGER = LoggerFactory.getLogger(DefaultSchedulerJobClient.class); - - private final boolean connectorSpecificResourceDefaultsEnabled; - private final JobPersistence jobPersistence; - private final JobCreator jobCreator; - - public DefaultSchedulerJobClient(final boolean connectorSpecificResourceDefaultsEnabled, - final JobPersistence jobPersistence, - final JobCreator jobCreator) { - this.connectorSpecificResourceDefaultsEnabled = connectorSpecificResourceDefaultsEnabled; - this.jobPersistence = jobPersistence; - this.jobCreator = jobCreator; - } - - @Override - public Job createOrGetActiveSyncJob(final SourceConnection source, - final DestinationConnection destination, - final StandardSync standardSync, - final String sourceDockerImage, - final String destinationDockerImage, - final List standardSyncOperations, - @Nullable final ActorDefinitionResourceRequirements ignorableSourceResourceRequirements, - @Nullable final ActorDefinitionResourceRequirements ignorableDestinationResourceRequirements) - throws IOException { - - ActorDefinitionResourceRequirements sourceResourceRequirements = ignorableSourceResourceRequirements; - ActorDefinitionResourceRequirements destinationResourceRequirements = ignorableDestinationResourceRequirements; - - // for OSS users, make it possible to ignore default actor-level resource requirements - if (!connectorSpecificResourceDefaultsEnabled) { - sourceResourceRequirements = null; - destinationResourceRequirements = null; - } - - final Optional jobIdOptional = jobCreator.createSyncJob( - source, - destination, - standardSync, - sourceDockerImage, - destinationDockerImage, - standardSyncOperations, - sourceResourceRequirements, - destinationResourceRequirements); - - final long jobId = jobIdOptional.isEmpty() - ? jobPersistence.getLastReplicationJob(standardSync.getConnectionId()).orElseThrow(() -> new RuntimeException("No job available")).getId() - : jobIdOptional.get(); - - return jobPersistence.getJob(jobId); - } - - @Override - public Job createOrGetActiveResetConnectionJob(final DestinationConnection destination, - final StandardSync standardSync, - final String destinationDockerImage, - final List standardSyncOperations) - throws IOException { - final Optional jobIdOptional = - jobCreator.createResetConnectionJob(destination, standardSync, destinationDockerImage, standardSyncOperations); - - final long jobId = jobIdOptional.isEmpty() - ? jobPersistence.getLastReplicationJob(standardSync.getConnectionId()).orElseThrow(() -> new RuntimeException("No job available")).getId() - : jobIdOptional.get(); - - return jobPersistence.getJob(jobId); - } - -} diff --git a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/EventRunner.java b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/EventRunner.java index 408888bcc7be..c657d63a736c 100644 --- a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/EventRunner.java +++ b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/EventRunner.java @@ -10,7 +10,7 @@ public interface EventRunner { - void createNewSchedulerWorkflow(final UUID connectionId); + void createConnectionManagerWorkflow(final UUID connectionId); ManualOperationResult startNewManualSync(final UUID connectionId); diff --git a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/TemporalEventRunner.java b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/TemporalEventRunner.java index f7bbbec81c63..87e18b105114 100644 --- a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/TemporalEventRunner.java +++ b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/TemporalEventRunner.java @@ -15,7 +15,7 @@ public class TemporalEventRunner implements EventRunner { private final TemporalClient temporalClient; - public void createNewSchedulerWorkflow(final UUID connectionId) { + public void createConnectionManagerWorkflow(final UUID connectionId) { temporalClient.submitConnectionUpdaterAsync(connectionId); } diff --git a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/DefaultSchedulerJobClientTest.java b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/DefaultSchedulerJobClientTest.java deleted file mode 100644 index eb8390988571..000000000000 --- a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/DefaultSchedulerJobClientTest.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.client; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.when; - -import io.airbyte.config.DestinationConnection; -import io.airbyte.config.SourceConnection; -import io.airbyte.config.StandardSync; -import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.persistence.JobCreator; -import io.airbyte.scheduler.persistence.JobPersistence; -import java.io.IOException; -import java.util.List; -import java.util.Optional; -import java.util.UUID; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class DefaultSchedulerJobClientTest { - - private static final long JOB_ID = 14L; - private static final String DOCKER_IMAGE = "airbyte/stardock"; - - private JobPersistence jobPersistence; - private JobCreator jobCreator; - private DefaultSchedulerJobClient client; - private Job job; - - @BeforeEach - void setup() { - jobPersistence = mock(JobPersistence.class); - jobCreator = mock(JobCreator.class); - job = mock(Job.class); - client = spy(new DefaultSchedulerJobClient(true, jobPersistence, jobCreator)); - } - - @Test - void testCreateSyncJob() throws IOException { - final SourceConnection source = mock(SourceConnection.class); - final DestinationConnection destination = mock(DestinationConnection.class); - final StandardSync standardSync = mock(StandardSync.class); - final String destinationDockerImage = "airbyte/spaceport"; - when(jobCreator.createSyncJob(source, destination, standardSync, DOCKER_IMAGE, destinationDockerImage, List.of(), null, null)) - .thenReturn(Optional.of(JOB_ID)); - when(jobPersistence.getJob(JOB_ID)).thenReturn(job); - - assertEquals( - job, - client.createOrGetActiveSyncJob(source, destination, standardSync, DOCKER_IMAGE, destinationDockerImage, List.of(), null, null)); - } - - @Test - void testCreateSyncJobAlreadyExist() throws IOException { - final SourceConnection source = mock(SourceConnection.class); - final DestinationConnection destination = mock(DestinationConnection.class); - final StandardSync standardSync = mock(StandardSync.class); - final UUID connectionUuid = UUID.randomUUID(); - when(standardSync.getConnectionId()).thenReturn(connectionUuid); - final String destinationDockerImage = "airbyte/spaceport"; - when(jobCreator.createSyncJob(source, destination, standardSync, DOCKER_IMAGE, destinationDockerImage, List.of(), null, null)) - .thenReturn(Optional.empty()); - - final Job currentJob = mock(Job.class); - when(currentJob.getId()).thenReturn(42L); - when(jobPersistence.getLastReplicationJob(connectionUuid)).thenReturn(Optional.of(currentJob)); - when(jobPersistence.getJob(42L)).thenReturn(currentJob); - - assertEquals(currentJob, client.createOrGetActiveSyncJob( - source, - destination, - standardSync, - DOCKER_IMAGE, - destinationDockerImage, - List.of(), - null, - null)); - } - - @Test - void testCreateResetConnectionJob() throws IOException { - final DestinationConnection destination = mock(DestinationConnection.class); - final StandardSync standardSync = mock(StandardSync.class); - final String destinationDockerImage = "airbyte/spaceport"; - when(jobCreator.createResetConnectionJob(destination, standardSync, destinationDockerImage, List.of())).thenReturn(Optional.of(JOB_ID)); - when(jobPersistence.getJob(JOB_ID)).thenReturn(job); - - assertEquals(job, client.createOrGetActiveResetConnectionJob(destination, standardSync, destinationDockerImage, List.of())); - } - - @Test - void testCreateResetConnectionJobAlreadyExist() throws IOException { - final DestinationConnection destination = mock(DestinationConnection.class); - final StandardSync standardSync = mock(StandardSync.class); - final UUID connectionUuid = UUID.randomUUID(); - when(standardSync.getConnectionId()).thenReturn(connectionUuid); - final String destinationDockerImage = "airbyte/spaceport"; - when(jobCreator.createResetConnectionJob(destination, standardSync, destinationDockerImage, List.of())).thenReturn(Optional.empty()); - - final Job currentJob = mock(Job.class); - when(currentJob.getId()).thenReturn(42L); - when(jobPersistence.getLastReplicationJob(connectionUuid)).thenReturn(Optional.of(currentJob)); - when(jobPersistence.getJob(42L)).thenReturn(currentJob); - - assertEquals(currentJob, client.createOrGetActiveResetConnectionJob(destination, standardSync, destinationDockerImage, List.of())); - } - -} diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java index cd7a854e224f..091ae398ed2f 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java @@ -16,12 +16,9 @@ import io.airbyte.config.persistence.SecretsRepositoryWriter; import io.airbyte.db.Database; import io.airbyte.scheduler.client.EventRunner; -import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.SynchronousSchedulerClient; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.apis.ConfigurationApi; -import io.airbyte.workers.WorkerConfigs; -import io.temporal.serviceclient.WorkflowServiceStubs; import java.net.http.HttpClient; import java.nio.file.Path; import java.util.Map; @@ -31,13 +28,11 @@ public class ConfigurationApiFactory implements Factory { - private static WorkflowServiceStubs temporalService; private static ConfigRepository configRepository; private static JobPersistence jobPersistence; private static ConfigPersistence seed; private static SecretsRepositoryReader secretsRepositoryReader; private static SecretsRepositoryWriter secretsRepositoryWriter; - private static SchedulerJobClient schedulerJobClient; private static SynchronousSchedulerClient synchronousSchedulerClient; private static FileTtlManager archiveTtlManager; private static Map mdc; @@ -46,9 +41,7 @@ public class ConfigurationApiFactory implements Factory { private static TrackingClient trackingClient; private static WorkerEnvironment workerEnvironment; private static LogConfigs logConfigs; - private static WorkerConfigs workerConfigs; private static Path workspaceRoot; - private static String webappUrl; private static AirbyteVersion airbyteVersion; private static HttpClient httpClient; private static FeatureFlags featureFlags; @@ -57,13 +50,11 @@ public class ConfigurationApiFactory implements Factory { private static Flyway jobsFlyway; public static void setValues( - final WorkflowServiceStubs temporalService, final ConfigRepository configRepository, final SecretsRepositoryReader secretsRepositoryReader, final SecretsRepositoryWriter secretsRepositoryWriter, final JobPersistence jobPersistence, final ConfigPersistence seed, - final SchedulerJobClient schedulerJobClient, final SynchronousSchedulerClient synchronousSchedulerClient, final FileTtlManager archiveTtlManager, final Map mdc, @@ -72,8 +63,6 @@ public static void setValues( final TrackingClient trackingClient, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, - final WorkerConfigs workerConfigs, - final String webappUrl, final AirbyteVersion airbyteVersion, final Path workspaceRoot, final HttpClient httpClient, @@ -86,19 +75,15 @@ public static void setValues( ConfigurationApiFactory.seed = seed; ConfigurationApiFactory.secretsRepositoryReader = secretsRepositoryReader; ConfigurationApiFactory.secretsRepositoryWriter = secretsRepositoryWriter; - ConfigurationApiFactory.schedulerJobClient = schedulerJobClient; ConfigurationApiFactory.synchronousSchedulerClient = synchronousSchedulerClient; ConfigurationApiFactory.archiveTtlManager = archiveTtlManager; ConfigurationApiFactory.mdc = mdc; - ConfigurationApiFactory.temporalService = temporalService; ConfigurationApiFactory.configsDatabase = configsDatabase; ConfigurationApiFactory.jobsDatabase = jobsDatabase; ConfigurationApiFactory.trackingClient = trackingClient; ConfigurationApiFactory.workerEnvironment = workerEnvironment; ConfigurationApiFactory.logConfigs = logConfigs; - ConfigurationApiFactory.workerConfigs = workerConfigs; ConfigurationApiFactory.workspaceRoot = workspaceRoot; - ConfigurationApiFactory.webappUrl = webappUrl; ConfigurationApiFactory.airbyteVersion = airbyteVersion; ConfigurationApiFactory.httpClient = httpClient; ConfigurationApiFactory.featureFlags = featureFlags; @@ -117,17 +102,13 @@ public ConfigurationApi provide() { ConfigurationApiFactory.seed, ConfigurationApiFactory.secretsRepositoryReader, ConfigurationApiFactory.secretsRepositoryWriter, - ConfigurationApiFactory.schedulerJobClient, ConfigurationApiFactory.synchronousSchedulerClient, ConfigurationApiFactory.archiveTtlManager, - ConfigurationApiFactory.temporalService, ConfigurationApiFactory.configsDatabase, ConfigurationApiFactory.jobsDatabase, ConfigurationApiFactory.trackingClient, ConfigurationApiFactory.workerEnvironment, ConfigurationApiFactory.logConfigs, - ConfigurationApiFactory.workerConfigs, - ConfigurationApiFactory.webappUrl, ConfigurationApiFactory.airbyteVersion, ConfigurationApiFactory.workspaceRoot, ConfigurationApiFactory.httpClient, diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index a5c7b791b65b..1f5bdb2b97fe 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -35,12 +35,9 @@ import io.airbyte.db.factory.FlywayFactory; import io.airbyte.db.instance.configs.ConfigsDatabaseMigrator; import io.airbyte.db.instance.jobs.JobsDatabaseMigrator; -import io.airbyte.scheduler.client.DefaultSchedulerJobClient; import io.airbyte.scheduler.client.DefaultSynchronousSchedulerClient; import io.airbyte.scheduler.client.EventRunner; -import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.TemporalEventRunner; -import io.airbyte.scheduler.persistence.DefaultJobCreator; import io.airbyte.scheduler.persistence.DefaultJobPersistence; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; @@ -53,10 +50,7 @@ import io.airbyte.server.errors.UncaughtExceptionMapper; import io.airbyte.server.handlers.DbMigrationHandler; import io.airbyte.validation.json.JsonValidationException; -import io.airbyte.workers.WorkerConfigs; import io.airbyte.workers.temporal.TemporalClient; -import io.airbyte.workers.temporal.TemporalUtils; -import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.IOException; import java.net.http.HttpClient; import java.util.Map; @@ -158,8 +152,6 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final DSLContext jobsDslContext, final Flyway jobsFlyway) throws Exception { - final WorkerConfigs workerConfigs = new WorkerConfigs(configs); - LogClientSingleton.getInstance().setWorkspaceMdc( configs.getWorkerEnvironment(), configs.getLogConfigs(), @@ -201,14 +193,8 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final TrackingClient trackingClient = TrackingClientSingleton.get(); final JobTracker jobTracker = new JobTracker(configRepository, jobPersistence, trackingClient); - final WorkflowServiceStubs temporalService = TemporalUtils.createTemporalService(configs.getTemporalHost()); final TemporalClient temporalClient = TemporalClient.production(configs.getTemporalHost(), configs.getWorkspaceRoot(), configs); final OAuthConfigSupplier oAuthConfigSupplier = new OAuthConfigSupplier(configRepository, trackingClient); - final SchedulerJobClient schedulerJobClient = - new DefaultSchedulerJobClient( - configs.connectorSpecificResourceDefaultsEnabled(), - jobPersistence, - new DefaultJobCreator(jobPersistence, configRepository, workerConfigs.getResourceRequirements())); final DefaultSynchronousSchedulerClient syncSchedulerClient = new DefaultSynchronousSchedulerClient(temporalClient, jobTracker, oAuthConfigSupplier); final HttpClient httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); @@ -226,9 +212,7 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, LOGGER.info("Starting server..."); return apiFactory.create( - schedulerJobClient, syncSchedulerClient, - temporalService, configRepository, secretsRepositoryReader, secretsRepositoryWriter, @@ -239,8 +223,6 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, trackingClient, configs.getWorkerEnvironment(), configs.getLogConfigs(), - workerConfigs, - configs.getWebappUrl(), configs.getAirbyteVersion(), configs.getWorkspaceRoot(), httpClient, diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java index e8c69fad10a5..3221b565dbb5 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java @@ -16,12 +16,9 @@ import io.airbyte.config.persistence.SecretsRepositoryWriter; import io.airbyte.db.Database; import io.airbyte.scheduler.client.EventRunner; -import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.SynchronousSchedulerClient; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.apis.ConfigurationApi; -import io.airbyte.workers.WorkerConfigs; -import io.temporal.serviceclient.WorkflowServiceStubs; import java.net.http.HttpClient; import java.nio.file.Path; import java.util.Set; @@ -31,9 +28,7 @@ public interface ServerFactory { - ServerRunnable create(SchedulerJobClient schedulerJobClient, - SynchronousSchedulerClient cachingSchedulerClient, - WorkflowServiceStubs temporalService, + ServerRunnable create(SynchronousSchedulerClient cachingSchedulerClient, ConfigRepository configRepository, SecretsRepositoryReader secretsRepositoryReader, SecretsRepositoryWriter secretsRepositoryWriter, @@ -44,8 +39,6 @@ ServerRunnable create(SchedulerJobClient schedulerJobClient, TrackingClient trackingClient, WorkerEnvironment workerEnvironment, LogConfigs logConfigs, - WorkerConfigs workerConfigs, - String webappUrl, AirbyteVersion airbyteVersion, Path workspaceRoot, HttpClient httpClient, @@ -57,9 +50,7 @@ ServerRunnable create(SchedulerJobClient schedulerJobClient, class Api implements ServerFactory { @Override - public ServerRunnable create(final SchedulerJobClient schedulerJobClient, - final SynchronousSchedulerClient synchronousSchedulerClient, - final WorkflowServiceStubs temporalService, + public ServerRunnable create(final SynchronousSchedulerClient synchronousSchedulerClient, final ConfigRepository configRepository, final SecretsRepositoryReader secretsRepositoryReader, final SecretsRepositoryWriter secretsRepositoryWriter, @@ -70,8 +61,6 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, final TrackingClient trackingClient, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, - final WorkerConfigs workerConfigs, - final String webappUrl, final AirbyteVersion airbyteVersion, final Path workspaceRoot, final HttpClient httpClient, @@ -81,13 +70,11 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, final Flyway jobsFlyway) { // set static values for factory ConfigurationApiFactory.setValues( - temporalService, configRepository, secretsRepositoryReader, secretsRepositoryWriter, jobPersistence, seed, - schedulerJobClient, synchronousSchedulerClient, new FileTtlManager(10, TimeUnit.MINUTES, 10), MDC.getCopyOfContextMap(), @@ -96,8 +83,6 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, trackingClient, workerEnvironment, logConfigs, - workerConfigs, - webappUrl, airbyteVersion, workspaceRoot, httpClient, diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index 20d9f0126d3c..da634bd9e41d 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -108,12 +108,9 @@ import io.airbyte.config.persistence.SecretsRepositoryWriter; import io.airbyte.db.Database; import io.airbyte.scheduler.client.EventRunner; -import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.SynchronousSchedulerClient; -import io.airbyte.scheduler.persistence.JobNotifier; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.WorkspaceHelper; -import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; import io.airbyte.server.errors.BadObjectSchemaKnownException; import io.airbyte.server.errors.IdNotFoundKnownException; import io.airbyte.server.handlers.ArchiveHandler; @@ -134,8 +131,6 @@ import io.airbyte.server.handlers.WorkspacesHandler; import io.airbyte.validation.json.JsonSchemaValidator; import io.airbyte.validation.json.JsonValidationException; -import io.airbyte.workers.WorkerConfigs; -import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.File; import java.io.IOException; import java.net.http.HttpClient; @@ -171,17 +166,13 @@ public ConfigurationApi(final ConfigRepository configRepository, final ConfigPersistence seed, final SecretsRepositoryReader secretsRepositoryReader, final SecretsRepositoryWriter secretsRepositoryWriter, - final SchedulerJobClient schedulerJobClient, final SynchronousSchedulerClient synchronousSchedulerClient, final FileTtlManager archiveTtlManager, - final WorkflowServiceStubs temporalService, final Database configsDatabase, final Database jobsDatabase, final TrackingClient trackingClient, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, - final WorkerConfigs workerConfigs, - final String webappUrl, final AirbyteVersion airbyteVersion, final Path workspaceRoot, final HttpClient httpClient, @@ -194,11 +185,6 @@ public ConfigurationApi(final ConfigRepository configRepository, this.workspaceRoot = workspaceRoot; final JsonSchemaValidator schemaValidator = new JsonSchemaValidator(); - final JobNotifier jobNotifier = new JobNotifier( - webappUrl, - configRepository, - new WorkspaceHelper(configRepository, jobPersistence), - trackingClient); final WorkspaceHelper workspaceHelper = new WorkspaceHelper(configRepository, jobPersistence); @@ -206,20 +192,17 @@ public ConfigurationApi(final ConfigRepository configRepository, configRepository, secretsRepositoryReader, secretsRepositoryWriter, - schedulerJobClient, synchronousSchedulerClient, jobPersistence, - jobNotifier, - temporalService, - new OAuthConfigSupplier(configRepository, trackingClient), workerEnvironment, logConfigs, eventRunner, featureFlags); + workerEnvironment, + logConfigs, + eventRunner); connectionsHandler = new ConnectionsHandler( configRepository, workspaceHelper, trackingClient, - eventRunner, - featureFlags, - workerConfigs); + eventRunner); sourceHandler = new SourceHandler( configRepository, secretsRepositoryReader, diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java index b053cdfa1546..5e46b7cce9eb 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java @@ -14,7 +14,6 @@ import io.airbyte.api.model.generated.ConnectionRead; import io.airbyte.api.model.generated.ConnectionReadList; import io.airbyte.api.model.generated.ConnectionSearch; -import io.airbyte.api.model.generated.ConnectionStatus; import io.airbyte.api.model.generated.ConnectionUpdate; import io.airbyte.api.model.generated.DestinationRead; import io.airbyte.api.model.generated.DestinationSearch; @@ -22,7 +21,6 @@ import io.airbyte.api.model.generated.SourceSearch; import io.airbyte.api.model.generated.WorkspaceIdRequestBody; import io.airbyte.commons.enums.Enums; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.json.Jsons; import io.airbyte.config.ActorCatalog; import io.airbyte.config.DestinationConnection; @@ -44,7 +42,6 @@ import io.airbyte.server.handlers.helpers.DestinationMatcher; import io.airbyte.server.handlers.helpers.SourceMatcher; import io.airbyte.validation.json.JsonValidationException; -import io.airbyte.workers.WorkerConfigs; import io.airbyte.workers.helper.ConnectionHelper; import java.io.IOException; import java.util.Collections; @@ -66,39 +63,29 @@ public class ConnectionsHandler { private final WorkspaceHelper workspaceHelper; private final TrackingClient trackingClient; private final EventRunner eventRunner; - private final FeatureFlags featureFlags; - private final WorkerConfigs workerConfigs; @VisibleForTesting ConnectionsHandler(final ConfigRepository configRepository, final Supplier uuidGenerator, final WorkspaceHelper workspaceHelper, final TrackingClient trackingClient, - final EventRunner eventRunner, - final FeatureFlags featureFlags, - final WorkerConfigs workerConfigs) { + final EventRunner eventRunner) { this.configRepository = configRepository; this.uuidGenerator = uuidGenerator; this.workspaceHelper = workspaceHelper; this.trackingClient = trackingClient; this.eventRunner = eventRunner; - this.featureFlags = featureFlags; - this.workerConfigs = workerConfigs; } public ConnectionsHandler(final ConfigRepository configRepository, final WorkspaceHelper workspaceHelper, final TrackingClient trackingClient, - final EventRunner eventRunner, - final FeatureFlags featureFlags, - final WorkerConfigs workerConfigs) { + final EventRunner eventRunner) { this(configRepository, UUID::randomUUID, workspaceHelper, trackingClient, - eventRunner, - featureFlags, - workerConfigs); + eventRunner); } @@ -157,15 +144,13 @@ public ConnectionRead createConnection(final ConnectionCreate connectionCreate) trackNewConnection(standardSync); - if (featureFlags.usesNewScheduler()) { - try { - LOGGER.info("Starting a connection using the new scheduler"); - eventRunner.createNewSchedulerWorkflow(connectionId); - } catch (final Exception e) { - LOGGER.error("Start of the temporal connection manager workflow failed", e); - configRepository.deleteStandardSyncDefinition(standardSync.getConnectionId()); - throw e; - } + try { + LOGGER.info("Starting a connection manager workflow"); + eventRunner.createConnectionManagerWorkflow(connectionId); + } catch (final Exception e) { + LOGGER.error("Start of the connection manager workflow failed", e); + configRepository.deleteStandardSyncDefinition(standardSync.getConnectionId()); + throw e; } return buildConnectionRead(connectionId); @@ -223,9 +208,7 @@ public ConnectionRead updateConnection(final ConnectionUpdate connectionUpdate) configRepository.writeStandardSync(newConnection); - if (featureFlags.usesNewScheduler()) { - eventRunner.update(connectionUpdate.getConnectionId()); - } + eventRunner.update(connectionUpdate.getConnectionId()); return buildConnectionRead(connectionUpdate.getConnectionId()); } @@ -337,36 +320,8 @@ public boolean matchSearch(final DestinationSearch destinationSearch, final Dest return (destinationReadFromSearch == null || destinationReadFromSearch.equals(destinationRead)); } - public void deleteConnection(final UUID connectionId) - throws ConfigNotFoundException, IOException, JsonValidationException { - if (featureFlags.usesNewScheduler()) { - // todo (cgardens) - need an interface over this. - eventRunner.deleteConnection(connectionId); - } else { - final ConnectionRead connectionRead = getConnection(connectionId); - deleteConnection(connectionRead); - } - } - - public void deleteConnection(final ConnectionRead connectionRead) throws ConfigNotFoundException, IOException, JsonValidationException { - final ConnectionUpdate connectionUpdate = new ConnectionUpdate() - .namespaceDefinition(connectionRead.getNamespaceDefinition()) - .namespaceFormat(connectionRead.getNamespaceFormat()) - .prefix(connectionRead.getPrefix()) - .connectionId(connectionRead.getConnectionId()) - .operationIds(connectionRead.getOperationIds()) - .syncCatalog(connectionRead.getSyncCatalog()) - .schedule(connectionRead.getSchedule()) - .status(ConnectionStatus.DEPRECATED) - .resourceRequirements(connectionRead.getResourceRequirements()); - - updateConnection(connectionUpdate); - } - - private boolean isStandardSyncInWorkspace(final UUID workspaceId, - final StandardSync standardSync) - throws ConfigNotFoundException, IOException, JsonValidationException { - return configRepository.getSourceConnection(standardSync.getSourceId()).getWorkspaceId().equals(workspaceId); + public void deleteConnection(final UUID connectionId) { + eventRunner.deleteConnection(connectionId); } private ConnectionRead buildConnectionRead(final UUID connectionId) diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java index 413f91c0e72b..16250c869c23 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java @@ -4,10 +4,8 @@ package io.airbyte.server.handlers; -import com.fasterxml.jackson.databind.JsonNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Charsets; -import com.google.common.collect.Lists; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import io.airbyte.api.model.generated.AdvancedAuth; @@ -36,7 +34,6 @@ import io.airbyte.api.model.generated.SynchronousJobRead; import io.airbyte.commons.docker.DockerUtils; import io.airbyte.commons.enums.Enums; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.json.Jsons; import io.airbyte.config.ActorCatalog; import io.airbyte.config.Configs.WorkerEnvironment; @@ -46,8 +43,6 @@ import io.airbyte.config.StandardCheckConnectionOutput; import io.airbyte.config.StandardDestinationDefinition; import io.airbyte.config.StandardSourceDefinition; -import io.airbyte.config.StandardSync; -import io.airbyte.config.StandardSyncOperation; import io.airbyte.config.State; import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; @@ -57,14 +52,11 @@ import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.scheduler.client.EventRunner; -import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.SynchronousJobMetadata; import io.airbyte.scheduler.client.SynchronousResponse; import io.airbyte.scheduler.client.SynchronousSchedulerClient; import io.airbyte.scheduler.models.Job; -import io.airbyte.scheduler.persistence.JobNotifier; import io.airbyte.scheduler.persistence.JobPersistence; -import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; import io.airbyte.server.converters.ConfigurationUpdate; import io.airbyte.server.converters.JobConverter; import io.airbyte.server.converters.OauthModelConverter; @@ -72,13 +64,8 @@ import io.airbyte.validation.json.JsonSchemaValidator; import io.airbyte.validation.json.JsonValidationException; import io.airbyte.workers.temporal.TemporalClient.ManualOperationResult; -import io.airbyte.workers.temporal.TemporalUtils; -import io.temporal.api.common.v1.WorkflowExecution; -import io.temporal.api.workflowservice.v1.RequestCancelWorkflowExecutionRequest; -import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.IOException; import java.util.ArrayList; -import java.util.List; import java.util.Optional; import java.util.UUID; import org.slf4j.Logger; @@ -91,83 +78,48 @@ public class SchedulerHandler { private final ConfigRepository configRepository; private final SecretsRepositoryWriter secretsRepositoryWriter; - private final SchedulerJobClient schedulerJobClient; private final SynchronousSchedulerClient synchronousSchedulerClient; private final ConfigurationUpdate configurationUpdate; private final JsonSchemaValidator jsonSchemaValidator; private final JobPersistence jobPersistence; - private final JobNotifier jobNotifier; - private final WorkflowServiceStubs temporalService; - private final OAuthConfigSupplier oAuthConfigSupplier; private final JobConverter jobConverter; - private final WorkerEnvironment workerEnvironment; - private final LogConfigs logConfigs; private final EventRunner eventRunner; - private final FeatureFlags featureFlags; public SchedulerHandler(final ConfigRepository configRepository, final SecretsRepositoryReader secretsRepositoryReader, final SecretsRepositoryWriter secretsRepositoryWriter, - final SchedulerJobClient schedulerJobClient, final SynchronousSchedulerClient synchronousSchedulerClient, final JobPersistence jobPersistence, - final JobNotifier jobNotifier, - final WorkflowServiceStubs temporalService, - final OAuthConfigSupplier oAuthConfigSupplier, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, - final EventRunner eventRunner, - final FeatureFlags featureFlags) { + final EventRunner eventRunner) { this( configRepository, secretsRepositoryWriter, - secretsRepositoryReader, - schedulerJobClient, synchronousSchedulerClient, new ConfigurationUpdate(configRepository, secretsRepositoryReader), new JsonSchemaValidator(), jobPersistence, - jobNotifier, - temporalService, - oAuthConfigSupplier, - workerEnvironment, - logConfigs, eventRunner, - featureFlags, new JobConverter(workerEnvironment, logConfigs)); } @VisibleForTesting SchedulerHandler(final ConfigRepository configRepository, final SecretsRepositoryWriter secretsRepositoryWriter, - final SecretsRepositoryReader secretsRepositoryReader, - final SchedulerJobClient schedulerJobClient, final SynchronousSchedulerClient synchronousSchedulerClient, final ConfigurationUpdate configurationUpdate, final JsonSchemaValidator jsonSchemaValidator, final JobPersistence jobPersistence, - final JobNotifier jobNotifier, - final WorkflowServiceStubs temporalService, - final OAuthConfigSupplier oAuthConfigSupplier, - final WorkerEnvironment workerEnvironment, - final LogConfigs logConfigs, final EventRunner eventRunner, - final FeatureFlags featureFlags, final JobConverter jobConverter) { this.configRepository = configRepository; this.secretsRepositoryWriter = secretsRepositoryWriter; - this.schedulerJobClient = schedulerJobClient; this.synchronousSchedulerClient = synchronousSchedulerClient; this.configurationUpdate = configurationUpdate; this.jsonSchemaValidator = jsonSchemaValidator; this.jobPersistence = jobPersistence; - this.jobNotifier = jobNotifier; - this.temporalService = temporalService; - this.oAuthConfigSupplier = oAuthConfigSupplier; - this.workerEnvironment = workerEnvironment; - this.logConfigs = logConfigs; this.eventRunner = eventRunner; - this.featureFlags = featureFlags; this.jobConverter = jobConverter; } @@ -361,75 +313,12 @@ public DestinationDefinitionSpecificationRead getDestinationSpecification( return specRead; } - public JobInfoRead syncConnection(final ConnectionIdRequestBody connectionIdRequestBody) - throws ConfigNotFoundException, IOException, JsonValidationException { - if (featureFlags.usesNewScheduler()) { - return submitManualSyncToWorker(connectionIdRequestBody.getConnectionId()); - } - final UUID connectionId = connectionIdRequestBody.getConnectionId(); - final StandardSync standardSync = configRepository.getStandardSync(connectionId); - - final SourceConnection sourceConnection = configRepository.getSourceConnection(standardSync.getSourceId()); - final DestinationConnection destinationConnection = configRepository.getDestinationConnection(standardSync.getDestinationId()); - final JsonNode sourceConfiguration = oAuthConfigSupplier.injectSourceOAuthParameters( - sourceConnection.getSourceDefinitionId(), - sourceConnection.getWorkspaceId(), - sourceConnection.getConfiguration()); - sourceConnection.withConfiguration(sourceConfiguration); - final JsonNode destinationConfiguration = oAuthConfigSupplier.injectDestinationOAuthParameters( - destinationConnection.getDestinationDefinitionId(), - destinationConnection.getWorkspaceId(), - destinationConnection.getConfiguration()); - destinationConnection.withConfiguration(destinationConfiguration); - - final StandardSourceDefinition sourceDef = configRepository.getStandardSourceDefinition(sourceConnection.getSourceDefinitionId()); - final String sourceImageName = DockerUtils.getTaggedImageName(sourceDef.getDockerRepository(), sourceDef.getDockerImageTag()); - - final StandardDestinationDefinition destinationDef = - configRepository.getStandardDestinationDefinition(destinationConnection.getDestinationDefinitionId()); - final String destinationImageName = DockerUtils.getTaggedImageName(destinationDef.getDockerRepository(), destinationDef.getDockerImageTag()); - - final List standardSyncOperations = Lists.newArrayList(); - for (final var operationId : standardSync.getOperationIds()) { - final StandardSyncOperation standardSyncOperation = configRepository.getStandardSyncOperation(operationId); - standardSyncOperations.add(standardSyncOperation); - } - - final Job job = schedulerJobClient.createOrGetActiveSyncJob( - sourceConnection, - destinationConnection, - standardSync, - sourceImageName, - destinationImageName, - standardSyncOperations, - sourceDef.getResourceRequirements(), - destinationDef.getResourceRequirements()); - - return jobConverter.getJobInfoRead(job); + public JobInfoRead syncConnection(final ConnectionIdRequestBody connectionIdRequestBody) throws IOException { + return submitManualSyncToWorker(connectionIdRequestBody.getConnectionId()); } - public JobInfoRead resetConnection(final ConnectionIdRequestBody connectionIdRequestBody) - throws IOException, JsonValidationException, ConfigNotFoundException { - if (featureFlags.usesNewScheduler()) { - return submitResetConnectionToWorker(connectionIdRequestBody.getConnectionId()); - } - final UUID connectionId = connectionIdRequestBody.getConnectionId(); - final StandardSync standardSync = configRepository.getStandardSync(connectionId); - - final DestinationConnection destination = configRepository.getDestinationConnection(standardSync.getDestinationId()); - - final StandardDestinationDefinition destinationDef = configRepository.getStandardDestinationDefinition(destination.getDestinationDefinitionId()); - final String destinationImageName = DockerUtils.getTaggedImageName(destinationDef.getDockerRepository(), destinationDef.getDockerImageTag()); - - final List standardSyncOperations = Lists.newArrayList(); - for (final var operationId : standardSync.getOperationIds()) { - final StandardSyncOperation standardSyncOperation = configRepository.getStandardSyncOperation(operationId); - standardSyncOperations.add(standardSyncOperation); - } - - final Job job = schedulerJobClient.createOrGetActiveResetConnectionJob(destination, standardSync, destinationImageName, standardSyncOperations); - - return jobConverter.getJobInfoRead(job); + public JobInfoRead resetConnection(final ConnectionIdRequestBody connectionIdRequestBody) throws IOException { + return submitResetConnectionToWorker(connectionIdRequestBody.getConnectionId()); } public ConnectionState getState(final ConnectionIdRequestBody connectionIdRequestBody) throws IOException { @@ -444,40 +333,8 @@ public ConnectionState getState(final ConnectionIdRequestBody connectionIdReques return connectionState; } - // todo (cgardens) - this method needs a test. public JobInfoRead cancelJob(final JobIdRequestBody jobIdRequestBody) throws IOException { - if (featureFlags.usesNewScheduler()) { - return submitCancellationToWorker(jobIdRequestBody.getId()); - } - - final long jobId = jobIdRequestBody.getId(); - - // prevent this job from being scheduled again - jobPersistence.cancelJob(jobId); - cancelTemporalWorkflowIfPresent(jobId); - - final Job job = jobPersistence.getJob(jobId); - jobNotifier.failJob("job was cancelled", job); - return jobConverter.getJobInfoRead(job); - } - - private void cancelTemporalWorkflowIfPresent(final long jobId) throws IOException { - // attempts ids are monotonically increasing starting from 0 and specific to a job id, allowing us - // to do this. - final var latestAttemptId = jobPersistence.getJob(jobId).getAttempts().size() - 1; - final var workflowId = jobPersistence.getAttemptTemporalWorkflowId(jobId, latestAttemptId); - - if (workflowId.isPresent()) { - LOGGER.info("Cancelling workflow: {}", workflowId); - final WorkflowExecution workflowExecution = WorkflowExecution.newBuilder() - .setWorkflowId(workflowId.get()) - .build(); - final RequestCancelWorkflowExecutionRequest cancelRequest = RequestCancelWorkflowExecutionRequest.newBuilder() - .setWorkflowExecution(workflowExecution) - .setNamespace(TemporalUtils.DEFAULT_NAMESPACE) - .build(); - temporalService.blockingStub().requestCancelWorkflowExecution(cancelRequest); - } + return submitCancellationToWorker(jobIdRequestBody.getId()); } private CheckConnectionRead reportConnectionStatus(final SynchronousResponse response) { diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java index d10e18e36fdd..0ef9a8916d3e 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java @@ -286,28 +286,13 @@ public WebBackendConnectionRead webBackendUpdateConnection(final WebBackendConne ConnectionRead connectionRead; final boolean needReset = MoreBooleans.isTruthy(webBackendConnectionUpdate.getWithRefreshedCatalog()); - if (!featureFlags.usesNewScheduler()) { - connectionRead = connectionsHandler.updateConnection(connectionUpdate); - if (needReset) { - final ConnectionIdRequestBody connectionId = new ConnectionIdRequestBody().connectionId(webBackendConnectionUpdate.getConnectionId()); - // wait for this to execute - schedulerHandler.resetConnection(connectionId); - - // just create the job - schedulerHandler.syncConnection(connectionId); - } - } else { - connectionRead = connectionsHandler.updateConnection(connectionUpdate); - if (needReset) { + connectionRead = connectionsHandler.updateConnection(connectionUpdate); - // todo (cgardens) - temporalWorkerRunFactory CANNOT be here. - eventRunner.synchronousResetConnection(webBackendConnectionUpdate.getConnectionId()); - - // todo (cgardens) - temporalWorkerRunFactory CANNOT be here. - eventRunner.startNewManualSync(webBackendConnectionUpdate.getConnectionId()); - connectionRead = connectionsHandler.getConnection(connectionUpdate.getConnectionId()); - } + if (needReset) { + eventRunner.synchronousResetConnection(webBackendConnectionUpdate.getConnectionId()); + eventRunner.startNewManualSync(webBackendConnectionUpdate.getConnectionId()); + connectionRead = connectionsHandler.getConnection(connectionUpdate.getConnectionId()); } return buildWebBackendConnectionRead(connectionRead); diff --git a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java index 38d8019d5e18..ba9a62928299 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java @@ -21,11 +21,8 @@ import io.airbyte.config.persistence.SecretsRepositoryWriter; import io.airbyte.db.Database; import io.airbyte.scheduler.client.EventRunner; -import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.SynchronousSchedulerClient; import io.airbyte.scheduler.persistence.JobPersistence; -import io.airbyte.workers.WorkerConfigs; -import io.temporal.serviceclient.WorkflowServiceStubs; import java.net.http.HttpClient; import java.nio.file.Path; import org.flywaydb.core.Flyway; @@ -45,17 +42,13 @@ void testImportDefinitions() { mock(ConfigPersistence.class), mock(SecretsRepositoryReader.class), mock(SecretsRepositoryWriter.class), - mock(SchedulerJobClient.class), mock(SynchronousSchedulerClient.class), mock(FileTtlManager.class), - mock(WorkflowServiceStubs.class), mock(Database.class), mock(Database.class), mock(TrackingClient.class), WorkerEnvironment.DOCKER, LogConfigs.EMPTY, - mock(WorkerConfigs.class), - "http://localhost", new AirbyteVersion("0.1.0-alpha"), Path.of(""), mock(HttpClient.class), diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/ConnectionsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/ConnectionsHandlerTest.java index 594bd707fe40..5a4128595e35 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/ConnectionsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/ConnectionsHandlerTest.java @@ -7,9 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -30,12 +28,10 @@ import io.airbyte.api.model.generated.SyncMode; import io.airbyte.api.model.generated.WorkspaceIdRequestBody; import io.airbyte.commons.enums.Enums; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.json.Jsons; import io.airbyte.config.ConfigSchema; import io.airbyte.config.DataType; import io.airbyte.config.DestinationConnection; -import io.airbyte.config.EnvConfigs; import io.airbyte.config.JobSyncConfig; import io.airbyte.config.Schedule; import io.airbyte.config.SourceConnection; @@ -43,19 +39,15 @@ import io.airbyte.config.StandardSourceDefinition; import io.airbyte.config.StandardSync; import io.airbyte.config.StandardSyncOperation; -import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.scheduler.client.EventRunner; -import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.WorkspaceHelper; -import io.airbyte.scheduler.persistence.job_factory.SyncJobFactory; import io.airbyte.server.converters.ApiPojoConverters; import io.airbyte.server.handlers.helpers.CatalogConverter; import io.airbyte.server.helpers.ConnectionHelpers; import io.airbyte.validation.json.JsonValidationException; -import io.airbyte.workers.WorkerConfigs; import java.io.IOException; import java.util.Collections; import java.util.List; @@ -65,15 +57,12 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; class ConnectionsHandlerTest { private ConfigRepository configRepository; private Supplier uuidGenerator; - private WorkerConfigs workerConfigs; private ConnectionsHandler connectionsHandler; private UUID workspaceId; private UUID sourceDefinitionId; @@ -92,15 +81,10 @@ class ConnectionsHandlerTest { private WorkspaceHelper workspaceHelper; private TrackingClient trackingClient; private EventRunner eventRunner; - private SyncJobFactory jobFactory; - private JobPersistence jobPersistence; - private LogConfigs logConfigs; - private FeatureFlags featureFlags; @SuppressWarnings("unchecked") @BeforeEach void setUp() throws IOException, JsonValidationException, ConfigNotFoundException { - workerConfigs = new WorkerConfigs(new EnvConfigs()); workspaceId = UUID.randomUUID(); sourceDefinitionId = UUID.randomUUID(); @@ -155,15 +139,12 @@ void setUp() throws IOException, JsonValidationException, ConfigNotFoundExceptio uuidGenerator = mock(Supplier.class); workspaceHelper = mock(WorkspaceHelper.class); trackingClient = mock(TrackingClient.class); - featureFlags = mock(FeatureFlags.class); eventRunner = mock(EventRunner.class); when(workspaceHelper.getWorkspaceForSourceIdIgnoreExceptions(sourceId)).thenReturn(workspaceId); when(workspaceHelper.getWorkspaceForSourceIdIgnoreExceptions(deletedSourceId)).thenReturn(workspaceId); when(workspaceHelper.getWorkspaceForDestinationIdIgnoreExceptions(destinationId)).thenReturn(workspaceId); when(workspaceHelper.getWorkspaceForOperationIdIgnoreExceptions(operationId)).thenReturn(workspaceId); - - when(featureFlags.usesNewScheduler()).thenReturn(false); } @Nested @@ -176,9 +157,7 @@ void setUp() { uuidGenerator, workspaceHelper, trackingClient, - eventRunner, - featureFlags, - workerConfigs); + eventRunner); } @Test @@ -311,12 +290,8 @@ void testCreateConnectionWithBadDefinitionIds() throws JsonValidationException, } - @ParameterizedTest - @ValueSource(booleans = {true, false}) - void testUpdateConnection(final boolean useNewScheduler) throws JsonValidationException, ConfigNotFoundException, IOException { - when(featureFlags.usesNewScheduler()) - .thenReturn(useNewScheduler); - + @Test + void testUpdateConnection() throws JsonValidationException, ConfigNotFoundException, IOException { final AirbyteCatalog catalog = ConnectionHelpers.generateBasicApiCatalog(); catalog.getStreams().get(0).getStream().setName("azkaban_users"); catalog.getStreams().get(0).getConfig().setAliasName("azkaban_users"); @@ -377,9 +352,7 @@ void testUpdateConnection(final boolean useNewScheduler) throws JsonValidationEx verify(configRepository).writeStandardSync(updatedStandardSync); - if (useNewScheduler) { - verify(eventRunner).update(connectionUpdate.getConnectionId()); - } + verify(eventRunner).update(connectionUpdate.getConnectionId()); } @Test @@ -575,33 +548,10 @@ void testSearchConnections() throws JsonValidationException, ConfigNotFoundExcep } @Test - void testDeleteConnection() throws JsonValidationException, IOException, ConfigNotFoundException { + void testDeleteConnection() { + connectionsHandler.deleteConnection(connectionId); - final ConnectionRead connectionRead = ConnectionHelpers.generateExpectedConnectionRead( - standardSync.getConnectionId(), - standardSync.getSourceId(), - standardSync.getDestinationId(), - standardSync.getOperationIds()); - - final ConnectionUpdate expectedConnectionUpdate = new ConnectionUpdate() - .namespaceDefinition(connectionRead.getNamespaceDefinition()) - .namespaceFormat(connectionRead.getNamespaceFormat()) - .prefix(connectionRead.getPrefix()) - .connectionId(connectionRead.getConnectionId()) - .operationIds(connectionRead.getOperationIds()) - .status(ConnectionStatus.DEPRECATED) - .syncCatalog(connectionRead.getSyncCatalog()) - .schedule(connectionRead.getSchedule()) - .resourceRequirements(connectionRead.getResourceRequirements()); - - final ConnectionsHandler spiedConnectionsHandler = spy(connectionsHandler); - doReturn(connectionRead).when(spiedConnectionsHandler).getConnection(connectionId); - doReturn(null).when(spiedConnectionsHandler).updateConnection(expectedConnectionUpdate); - - spiedConnectionsHandler.deleteConnection(connectionId); - - verify(spiedConnectionsHandler).getConnection(connectionId); - verify(spiedConnectionsHandler).updateConnection(expectedConnectionUpdate); + verify(eventRunner).deleteConnection(connectionId); } @Test diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java index 81318cede03c..4e2180b7965b 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java @@ -28,6 +28,7 @@ import io.airbyte.api.model.generated.DestinationDefinitionSpecificationRead; import io.airbyte.api.model.generated.DestinationIdRequestBody; import io.airbyte.api.model.generated.DestinationUpdate; +import io.airbyte.api.model.generated.JobIdRequestBody; import io.airbyte.api.model.generated.JobInfoRead; import io.airbyte.api.model.generated.SourceCoreConfig; import io.airbyte.api.model.generated.SourceDefinitionIdWithWorkspaceId; @@ -38,30 +39,20 @@ import io.airbyte.api.model.generated.SourceUpdate; import io.airbyte.commons.docker.DockerUtils; import io.airbyte.commons.enums.Enums; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; import io.airbyte.config.ActorCatalog; -import io.airbyte.config.ActorDefinitionResourceRequirements; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.DestinationConnection; -import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; -import io.airbyte.config.OperatorNormalization; -import io.airbyte.config.OperatorNormalization.Option; -import io.airbyte.config.ResourceRequirements; import io.airbyte.config.SourceConnection; import io.airbyte.config.StandardCheckConnectionOutput; import io.airbyte.config.StandardDestinationDefinition; import io.airbyte.config.StandardSourceDefinition; -import io.airbyte.config.StandardSync; -import io.airbyte.config.StandardSyncOperation; -import io.airbyte.config.StandardSyncOperation.OperatorType; import io.airbyte.config.State; import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.config.persistence.SecretsRepositoryReader; import io.airbyte.config.persistence.SecretsRepositoryWriter; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.CatalogHelpers; @@ -69,28 +60,22 @@ import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.scheduler.client.EventRunner; -import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.SynchronousJobMetadata; import io.airbyte.scheduler.client.SynchronousResponse; import io.airbyte.scheduler.client.SynchronousSchedulerClient; import io.airbyte.scheduler.models.Job; import io.airbyte.scheduler.models.JobStatus; -import io.airbyte.scheduler.persistence.JobNotifier; import io.airbyte.scheduler.persistence.JobPersistence; -import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; import io.airbyte.server.converters.ConfigurationUpdate; import io.airbyte.server.converters.JobConverter; -import io.airbyte.server.helpers.ConnectionHelpers; import io.airbyte.server.helpers.DestinationHelpers; import io.airbyte.server.helpers.SourceHelpers; import io.airbyte.validation.json.JsonSchemaValidator; import io.airbyte.validation.json.JsonValidationException; import io.airbyte.workers.temporal.TemporalClient.ManualOperationResult; -import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.IOException; import java.net.URI; import java.util.HashMap; -import java.util.List; import java.util.Optional; import java.util.UUID; import org.junit.jupiter.api.BeforeEach; @@ -132,17 +117,14 @@ class SchedulerHandlerTest { private SchedulerHandler schedulerHandler; private ConfigRepository configRepository; - private SecretsRepositoryReader secretsRepositoryReader; private SecretsRepositoryWriter secretsRepositoryWriter; private Job completedJob; - private SchedulerJobClient schedulerJobClient; private SynchronousSchedulerClient synchronousSchedulerClient; private SynchronousResponse jobResponse; private ConfigurationUpdate configurationUpdate; private JsonSchemaValidator jsonSchemaValidator; private JobPersistence jobPersistence; private EventRunner eventRunner; - private FeatureFlags featureFlags; private JobConverter jobConverter; @BeforeEach @@ -155,36 +137,22 @@ void setup() { when(completedJob.getConfig().getConfigType()).thenReturn(ConfigType.SYNC); when(completedJob.getScope()).thenReturn("sync:123"); - schedulerJobClient = spy(SchedulerJobClient.class); synchronousSchedulerClient = mock(SynchronousSchedulerClient.class); configRepository = mock(ConfigRepository.class); - secretsRepositoryReader = mock(SecretsRepositoryReader.class); secretsRepositoryWriter = mock(SecretsRepositoryWriter.class); jobPersistence = mock(JobPersistence.class); - final JobNotifier jobNotifier = mock(JobNotifier.class); eventRunner = mock(EventRunner.class); - featureFlags = mock(FeatureFlags.class); - when(featureFlags.usesNewScheduler()).thenReturn(false); - jobConverter = spy(new JobConverter(WorkerEnvironment.DOCKER, LogConfigs.EMPTY)); schedulerHandler = new SchedulerHandler( configRepository, secretsRepositoryWriter, - secretsRepositoryReader, - schedulerJobClient, synchronousSchedulerClient, configurationUpdate, jsonSchemaValidator, jobPersistence, - jobNotifier, - mock(WorkflowServiceStubs.class), - mock(OAuthConfigSupplier.class), - WorkerEnvironment.DOCKER, - LogConfigs.EMPTY, eventRunner, - featureFlags, jobConverter); } @@ -587,107 +555,6 @@ void testDiscoverSchemaForSourceFromSourceCreateFailed() throws JsonValidationEx verify(synchronousSchedulerClient).createDiscoverSchemaJob(source, SOURCE_DOCKER_IMAGE); } - @Test - void testSyncConnection() throws JsonValidationException, IOException, ConfigNotFoundException { - final StandardSync standardSync = ConnectionHelpers.generateSyncWithSourceId(UUID.randomUUID()); - final ConnectionIdRequestBody request = new ConnectionIdRequestBody().connectionId(standardSync.getConnectionId()); - final SourceConnection source = SourceHelpers.generateSource(UUID.randomUUID()).withSourceId(standardSync.getSourceId()); - final DestinationConnection destination = DestinationHelpers.generateDestination(UUID.randomUUID()) - .withDestinationId(standardSync.getDestinationId()); - final UUID operationId = standardSync.getOperationIds().get(0); - final List operations = getOperations(standardSync); - - final ActorDefinitionResourceRequirements sourceResourceReqs = - new ActorDefinitionResourceRequirements().withDefault(new ResourceRequirements().withCpuRequest("1")); - when(configRepository.getStandardSourceDefinition(source.getSourceDefinitionId())) - .thenReturn(new StandardSourceDefinition() - .withDockerRepository(SOURCE_DOCKER_REPO) - .withDockerImageTag(SOURCE_DOCKER_TAG) - .withSourceDefinitionId(source.getSourceDefinitionId()) - .withResourceRequirements(sourceResourceReqs)); - final ActorDefinitionResourceRequirements destResourceReqs = - new ActorDefinitionResourceRequirements().withDefault(new ResourceRequirements().withCpuRequest("2")); - when(configRepository.getStandardDestinationDefinition(destination.getDestinationDefinitionId())) - .thenReturn(new StandardDestinationDefinition() - .withDockerRepository(DESTINATION_DOCKER_REPO) - .withDockerImageTag(DESTINATION_DOCKER_TAG) - .withDestinationDefinitionId(destination.getDestinationDefinitionId()) - .withResourceRequirements(destResourceReqs)); - when(configRepository.getStandardSync(standardSync.getConnectionId())).thenReturn(standardSync); - when(configRepository.getSourceConnection(source.getSourceId())).thenReturn(source); - when(configRepository.getDestinationConnection(destination.getDestinationId())).thenReturn(destination); - when(configRepository.getStandardSyncOperation(operationId)).thenReturn(getOperation(operationId)); - when(schedulerJobClient.createOrGetActiveSyncJob( - source, - destination, - standardSync, - SOURCE_DOCKER_IMAGE, - DESTINATION_DOCKER_IMAGE, - operations, - sourceResourceReqs, - destResourceReqs)) - .thenReturn(completedJob); - when(completedJob.getScope()).thenReturn("cat:12"); - final JobConfig jobConfig = mock(JobConfig.class); - when(completedJob.getConfig()).thenReturn(jobConfig); - when(jobConfig.getConfigType()).thenReturn(ConfigType.SYNC); - - final JobInfoRead jobStatusRead = schedulerHandler.syncConnection(request); - - assertEquals(io.airbyte.api.model.generated.JobStatus.SUCCEEDED, jobStatusRead.getJob().getStatus()); - verify(configRepository).getStandardSync(standardSync.getConnectionId()); - verify(configRepository).getSourceConnection(standardSync.getSourceId()); - verify(configRepository).getDestinationConnection(standardSync.getDestinationId()); - verify(schedulerJobClient).createOrGetActiveSyncJob( - source, - destination, - standardSync, - SOURCE_DOCKER_IMAGE, - DESTINATION_DOCKER_IMAGE, - operations, - sourceResourceReqs, - destResourceReqs); - } - - @Test - void testResetConnection() throws JsonValidationException, IOException, ConfigNotFoundException { - final StandardSync standardSync = ConnectionHelpers.generateSyncWithSourceId(UUID.randomUUID()); - final ConnectionIdRequestBody request = new ConnectionIdRequestBody().connectionId(standardSync.getConnectionId()); - final SourceConnection source = SourceHelpers.generateSource(UUID.randomUUID()).withSourceId(standardSync.getSourceId()); - final DestinationConnection destination = DestinationHelpers.generateDestination(UUID.randomUUID()) - .withDestinationId(standardSync.getDestinationId()); - final UUID operationId = standardSync.getOperationIds().get(0); - final List operations = getOperations(standardSync); - - when(configRepository.getStandardSourceDefinition(source.getSourceDefinitionId())) - .thenReturn(new StandardSourceDefinition() - .withDockerRepository(SOURCE_DOCKER_REPO) - .withDockerImageTag(SOURCE_DOCKER_TAG) - .withSourceDefinitionId(source.getSourceDefinitionId())); - when(configRepository.getStandardDestinationDefinition(destination.getDestinationDefinitionId())) - .thenReturn(new StandardDestinationDefinition() - .withDockerRepository(DESTINATION_DOCKER_REPO) - .withDockerImageTag(DESTINATION_DOCKER_TAG) - .withDestinationDefinitionId(destination.getDestinationDefinitionId())); - when(configRepository.getStandardSync(standardSync.getConnectionId())).thenReturn(standardSync); - when(configRepository.getSourceConnection(source.getSourceId())).thenReturn(source); - when(configRepository.getDestinationConnection(destination.getDestinationId())).thenReturn(destination); - when(configRepository.getStandardSyncOperation(operationId)).thenReturn(getOperation(operationId)); - when(schedulerJobClient.createOrGetActiveResetConnectionJob(destination, standardSync, DESTINATION_DOCKER_IMAGE, operations)) - .thenReturn(completedJob); - when(completedJob.getScope()).thenReturn("cat:12"); - final JobConfig jobConfig = mock(JobConfig.class); - when(completedJob.getConfig()).thenReturn(jobConfig); - when(jobConfig.getConfigType()).thenReturn(ConfigType.SYNC); - - final JobInfoRead jobStatusRead = schedulerHandler.resetConnection(request); - - assertEquals(io.airbyte.api.model.generated.JobStatus.SUCCEEDED, jobStatusRead.getJob().getStatus()); - verify(configRepository).getStandardSync(standardSync.getConnectionId()); - verify(configRepository).getDestinationConnection(standardSync.getDestinationId()); - verify(schedulerJobClient).createOrGetActiveResetConnectionJob(destination, standardSync, DESTINATION_DOCKER_IMAGE, operations); - } - @Test void testGetCurrentState() throws IOException { final UUID connectionId = UUID.randomUUID(); @@ -714,9 +581,7 @@ void testEnumConversion() { } @Test - void testNewSchedulerSync() throws JsonValidationException, ConfigNotFoundException, IOException { - when(featureFlags.usesNewScheduler()).thenReturn(true); - + void testSyncConnection() throws IOException { final UUID connectionId = UUID.randomUUID(); final long jobId = 123L; @@ -737,20 +602,51 @@ void testNewSchedulerSync() throws JsonValidationException, ConfigNotFoundExcept verify(eventRunner).startNewManualSync(connectionId); } - private static List getOperations(final StandardSync standardSync) { - if (standardSync.getOperationIds() != null && !standardSync.getOperationIds().isEmpty()) { - return List.of(getOperation(standardSync.getOperationIds().get(0))); - } else { - return List.of(); - } + @Test + void testResetConnection() throws IOException { + final UUID connectionId = UUID.randomUUID(); + + final long jobId = 123L; + final ManualOperationResult manualOperationResult = ManualOperationResult + .builder() + .failingReason(Optional.empty()) + .jobId(Optional.of(jobId)) + .build(); + + when(eventRunner.resetConnection(connectionId)) + .thenReturn(manualOperationResult); + + doReturn(new JobInfoRead()) + .when(jobConverter).getJobInfoRead(any()); + + schedulerHandler.resetConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + + verify(eventRunner).resetConnection(connectionId); } - private static StandardSyncOperation getOperation(final UUID operationId) { - return new StandardSyncOperation() - .withOperationId(operationId) - .withName(OPERATION_NAME) - .withOperatorType(OperatorType.NORMALIZATION) - .withOperatorNormalization(new OperatorNormalization().withOption(Option.BASIC)); + @Test + void testCancelJob() throws IOException { + final UUID connectionId = UUID.randomUUID(); + final long jobId = 123L; + final Job job = mock(Job.class); + when(job.getScope()).thenReturn(connectionId.toString()); + when(jobPersistence.getJob(jobId)).thenReturn(job); + + final ManualOperationResult manualOperationResult = ManualOperationResult + .builder() + .failingReason(Optional.empty()) + .jobId(Optional.of(jobId)) + .build(); + + when(eventRunner.startNewCancellation(connectionId)) + .thenReturn(manualOperationResult); + + doReturn(new JobInfoRead()) + .when(jobConverter).getJobInfoRead(any()); + + schedulerHandler.cancelJob(new JobIdRequestBody().id(jobId)); + + verify(eventRunner).startNewCancellation(connectionId); } } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java index c5e9fc3b1385..4be812e6753f 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java @@ -587,43 +587,6 @@ void testUpdateConnectionWithUpdatedSchema() throws JsonValidationException, Con .syncCatalog(expectedWithNewSchema.getSyncCatalog()) .withRefreshedCatalog(true); - when(operationsHandler.listOperationsForConnection(any())).thenReturn(operationReadList); - when(connectionsHandler.getConnection(expected.getConnectionId())).thenReturn( - new ConnectionRead().connectionId(expected.getConnectionId())); - when(connectionsHandler.updateConnection(any())).thenReturn( - new ConnectionRead() - .connectionId(expected.getConnectionId()) - .sourceId(expected.getSourceId()) - .destinationId(expected.getDestinationId()) - .name(expected.getName()) - .namespaceDefinition(expected.getNamespaceDefinition()) - .namespaceFormat(expected.getNamespaceFormat()) - .prefix(expected.getPrefix()) - .syncCatalog(expectedWithNewSchema.getSyncCatalog()) - .status(expected.getStatus()) - .schedule(expected.getSchedule())); - - final WebBackendConnectionRead connectionRead = wbHandler.webBackendUpdateConnection(updateBody); - - assertEquals(expectedWithNewSchema.getSyncCatalog(), connectionRead.getSyncCatalog()); - - final ConnectionIdRequestBody connectionId = new ConnectionIdRequestBody().connectionId(connectionRead.getConnectionId()); - verify(schedulerHandler, times(1)).resetConnection(connectionId); - verify(schedulerHandler, times(1)).syncConnection(connectionId); - } - - @Test - void testUpdateConnectionWithUpdatedSchemaNewScheduler() throws JsonValidationException, ConfigNotFoundException, IOException { - final WebBackendConnectionUpdate updateBody = new WebBackendConnectionUpdate() - .namespaceDefinition(expected.getNamespaceDefinition()) - .namespaceFormat(expected.getNamespaceFormat()) - .prefix(expected.getPrefix()) - .connectionId(expected.getConnectionId()) - .schedule(expected.getSchedule()) - .status(expected.getStatus()) - .syncCatalog(expectedWithNewSchema.getSyncCatalog()) - .withRefreshedCatalog(true); - when(operationsHandler.listOperationsForConnection(any())).thenReturn(operationReadList); when(connectionsHandler.getConnection(expected.getConnectionId())).thenReturn( new ConnectionRead().connectionId(expected.getConnectionId())); @@ -640,7 +603,6 @@ void testUpdateConnectionWithUpdatedSchemaNewScheduler() throws JsonValidationEx .schedule(expected.getSchedule()); when(connectionsHandler.updateConnection(any())).thenReturn(connectionRead); when(connectionsHandler.getConnection(expected.getConnectionId())).thenReturn(connectionRead); - when(featureFlags.usesNewScheduler()).thenReturn(true); final WebBackendConnectionRead result = wbHandler.webBackendUpdateConnection(updateBody); diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java index 957e8d37aa11..d8947ac499aa 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java @@ -611,13 +611,8 @@ public void testIncrementalSync() throws Exception { LOGGER.info("Starting testIncrementalSync() reset"); final JobInfoRead jobInfoRead = apiClient.getConnectionApi().resetConnection(new ConnectionIdRequestBody().connectionId(connectionId)); - final FeatureFlags featureFlags = new EnvVariableFeatureFlags(); - if (featureFlags.usesNewScheduler()) { - waitWhileJobHasStatus(apiClient.getJobsApi(), jobInfoRead.getJob(), - Sets.newHashSet(JobStatus.PENDING, JobStatus.RUNNING, JobStatus.INCOMPLETE, JobStatus.FAILED)); - } else { - waitForSuccessfulJob(apiClient.getJobsApi(), jobInfoRead.getJob()); - } + waitWhileJobHasStatus(apiClient.getJobsApi(), jobInfoRead.getJob(), + Sets.newHashSet(JobStatus.PENDING, JobStatus.RUNNING, JobStatus.INCOMPLETE, JobStatus.FAILED)); LOGGER.info("state after reset: {}", apiClient.getConnectionApi().getState(new ConnectionIdRequestBody().connectionId(connectionId))); @@ -1212,24 +1207,20 @@ public void testDeleteConnection() throws Exception { LOGGER.info("Calling delete connection a second time to test repeat call behavior..."); apiClient.getConnectionApi().deleteConnection(new ConnectionIdRequestBody().connectionId(connectionId)); - // test deletion of connection when temporal workflow is in a bad state, only when using new - // scheduler - final FeatureFlags featureFlags = new EnvVariableFeatureFlags(); - if (featureFlags.usesNewScheduler()) { - LOGGER.info("Testing connection deletion when temporal is in a terminal state"); - connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); + // test deletion of connection when temporal workflow is in a bad state + LOGGER.info("Testing connection deletion when temporal is in a terminal state"); + connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); - terminateTemporalWorkflow(connectionId); + terminateTemporalWorkflow(connectionId); - // we should still be able to delete the connection when the temporal workflow is in this state - apiClient.getConnectionApi().deleteConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + // we should still be able to delete the connection when the temporal workflow is in this state + apiClient.getConnectionApi().deleteConnection(new ConnectionIdRequestBody().connectionId(connectionId)); - LOGGER.info("Waiting for connection to be deleted..."); - Thread.sleep(500); + LOGGER.info("Waiting for connection to be deleted..."); + Thread.sleep(500); - connectionStatus = apiClient.getConnectionApi().getConnection(new ConnectionIdRequestBody().connectionId(connectionId)).getStatus(); - assertEquals(ConnectionStatus.DEPRECATED, connectionStatus); - } + connectionStatus = apiClient.getConnectionApi().getConnection(new ConnectionIdRequestBody().connectionId(connectionId)).getStatus(); + assertEquals(ConnectionStatus.DEPRECATED, connectionStatus); } @Test @@ -1239,42 +1230,37 @@ public void testDeleteConnection() throws Exception { public void testUpdateConnectionWhenWorkflowUnreachable() throws Exception { // This test only covers the specific behavior of updating a connection that does not have an // underlying temporal workflow. - // This case only occurs with the new scheduler, so the entire test is inside the feature flag - // conditional. // Also, this test doesn't verify correctness of the schedule update applied, as adding the ability // to query a workflow for its current // schedule is out of scope for the issue (https://github.com/airbytehq/airbyte/issues/11215). This // test just ensures that the underlying workflow // is running after the update method is called. - final FeatureFlags featureFlags = new EnvVariableFeatureFlags(); - if (featureFlags.usesNewScheduler()) { - final String connectionName = "test-connection"; - final UUID sourceId = createPostgresSource().getSourceId(); - final UUID destinationId = createDestination().getDestinationId(); - final UUID operationId = createOperation().getOperationId(); - final AirbyteCatalog catalog = discoverSourceSchema(sourceId); - final SyncMode syncMode = SyncMode.INCREMENTAL; - final DestinationSyncMode destinationSyncMode = DestinationSyncMode.APPEND_DEDUP; - catalog.getStreams().forEach(s -> s.getConfig() - .syncMode(syncMode) - .cursorField(List.of(COLUMN_ID)) - .destinationSyncMode(destinationSyncMode) - .primaryKey(List.of(List.of(COLUMN_NAME)))); - - LOGGER.info("Testing connection update when temporal is in a terminal state"); - final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); - - terminateTemporalWorkflow(connectionId); - - // we should still be able to update the connection when the temporal workflow is in this state - updateConnectionSchedule(connectionId, new ConnectionSchedule().timeUnit(TimeUnitEnum.HOURS).units(1L)); - - LOGGER.info("Waiting for workflow to be recreated..."); - Thread.sleep(500); - - final WorkflowState workflowState = getWorkflowState(connectionId); - assertTrue(workflowState.isRunning()); - } + final String connectionName = "test-connection"; + final UUID sourceId = createPostgresSource().getSourceId(); + final UUID destinationId = createDestination().getDestinationId(); + final UUID operationId = createOperation().getOperationId(); + final AirbyteCatalog catalog = discoverSourceSchema(sourceId); + final SyncMode syncMode = SyncMode.INCREMENTAL; + final DestinationSyncMode destinationSyncMode = DestinationSyncMode.APPEND_DEDUP; + catalog.getStreams().forEach(s -> s.getConfig() + .syncMode(syncMode) + .cursorField(List.of(COLUMN_ID)) + .destinationSyncMode(destinationSyncMode) + .primaryKey(List.of(List.of(COLUMN_NAME)))); + + LOGGER.info("Testing connection update when temporal is in a terminal state"); + final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); + + terminateTemporalWorkflow(connectionId); + + // we should still be able to update the connection when the temporal workflow is in this state + updateConnectionSchedule(connectionId, new ConnectionSchedule().timeUnit(TimeUnitEnum.HOURS).units(1L)); + + LOGGER.info("Waiting for workflow to be recreated..."); + Thread.sleep(500); + + final WorkflowState workflowState = getWorkflowState(connectionId); + assertTrue(workflowState.isRunning()); } @Test @@ -1284,55 +1270,50 @@ public void testUpdateConnectionWhenWorkflowUnreachable() throws Exception { public void testManualSyncRepairsWorkflowWhenWorkflowUnreachable() throws Exception { // This test only covers the specific behavior of updating a connection that does not have an // underlying temporal workflow. - // This case only occurs with the new scheduler, so the entire test is inside the feature flag - // conditional. - final FeatureFlags featureFlags = new EnvVariableFeatureFlags(); - if (featureFlags.usesNewScheduler()) { - final String connectionName = "test-connection"; - final SourceDefinitionRead sourceDefinition = createE2eSourceDefinition(); - final SourceRead source = createSource( - "E2E Test Source -" + UUID.randomUUID(), - workspaceId, - sourceDefinition.getSourceDefinitionId(), - Jsons.jsonNode(ImmutableMap.builder() - .put("type", "INFINITE_FEED") - .put("max_records", 5000) - .put("message_interval", 100) - .build())); - final UUID sourceId = source.getSourceId(); - final UUID destinationId = createDestination().getDestinationId(); - final UUID operationId = createOperation().getOperationId(); - final AirbyteCatalog catalog = discoverSourceSchema(sourceId); - final SyncMode syncMode = SyncMode.INCREMENTAL; - final DestinationSyncMode destinationSyncMode = DestinationSyncMode.APPEND_DEDUP; - catalog.getStreams().forEach(s -> s.getConfig() - .syncMode(syncMode) - .cursorField(List.of(COLUMN_ID)) - .destinationSyncMode(destinationSyncMode) - .primaryKey(List.of(List.of(COLUMN_NAME)))); - - LOGGER.info("Testing manual sync when temporal is in a terminal state"); - final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); - - LOGGER.info("Starting first manual sync"); - final JobInfoRead firstJobInfo = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); - LOGGER.info("Terminating workflow during first sync"); - terminateTemporalWorkflow(connectionId); - - LOGGER.info("Submitted another manual sync"); - apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); - - LOGGER.info("Waiting for workflow to be recreated..."); - Thread.sleep(500); - - final WorkflowState workflowState = getWorkflowState(connectionId); - assertTrue(workflowState.isRunning()); - assertTrue(workflowState.isSkipScheduling()); - - // verify that the first manual sync was marked as failed - final JobInfoRead terminatedJobInfo = apiClient.getJobsApi().getJobInfo(new JobIdRequestBody().id(firstJobInfo.getJob().getId())); - assertEquals(JobStatus.FAILED, terminatedJobInfo.getJob().getStatus()); - } + final String connectionName = "test-connection"; + final SourceDefinitionRead sourceDefinition = createE2eSourceDefinition(); + final SourceRead source = createSource( + "E2E Test Source -" + UUID.randomUUID(), + workspaceId, + sourceDefinition.getSourceDefinitionId(), + Jsons.jsonNode(ImmutableMap.builder() + .put("type", "INFINITE_FEED") + .put("max_records", 5000) + .put("message_interval", 100) + .build())); + final UUID sourceId = source.getSourceId(); + final UUID destinationId = createDestination().getDestinationId(); + final UUID operationId = createOperation().getOperationId(); + final AirbyteCatalog catalog = discoverSourceSchema(sourceId); + final SyncMode syncMode = SyncMode.INCREMENTAL; + final DestinationSyncMode destinationSyncMode = DestinationSyncMode.APPEND_DEDUP; + catalog.getStreams().forEach(s -> s.getConfig() + .syncMode(syncMode) + .cursorField(List.of(COLUMN_ID)) + .destinationSyncMode(destinationSyncMode) + .primaryKey(List.of(List.of(COLUMN_NAME)))); + + LOGGER.info("Testing manual sync when temporal is in a terminal state"); + final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); + + LOGGER.info("Starting first manual sync"); + final JobInfoRead firstJobInfo = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + LOGGER.info("Terminating workflow during first sync"); + terminateTemporalWorkflow(connectionId); + + LOGGER.info("Submitted another manual sync"); + apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + + LOGGER.info("Waiting for workflow to be recreated..."); + Thread.sleep(500); + + final WorkflowState workflowState = getWorkflowState(connectionId); + assertTrue(workflowState.isRunning()); + assertTrue(workflowState.isSkipScheduling()); + + // verify that the first manual sync was marked as failed + final JobInfoRead terminatedJobInfo = apiClient.getJobsApi().getJobInfo(new JobIdRequestBody().id(firstJobInfo.getJob().getId())); + assertEquals(JobStatus.FAILED, terminatedJobInfo.getJob().getStatus()); } @Test @@ -1342,37 +1323,32 @@ public void testManualSyncRepairsWorkflowWhenWorkflowUnreachable() throws Except public void testResetConnectionRepairsWorkflowWhenWorkflowUnreachable() throws Exception { // This test only covers the specific behavior of updating a connection that does not have an // underlying temporal workflow. - // This case only occurs with the new scheduler, so the entire test is inside the feature flag - // conditional. - final FeatureFlags featureFlags = new EnvVariableFeatureFlags(); - if (featureFlags.usesNewScheduler()) { - final String connectionName = "test-connection"; - final UUID sourceId = createPostgresSource().getSourceId(); - final UUID destinationId = createDestination().getDestinationId(); - final UUID operationId = createOperation().getOperationId(); - final AirbyteCatalog catalog = discoverSourceSchema(sourceId); - final SyncMode syncMode = SyncMode.INCREMENTAL; - final DestinationSyncMode destinationSyncMode = DestinationSyncMode.APPEND_DEDUP; - catalog.getStreams().forEach(s -> s.getConfig() - .syncMode(syncMode) - .cursorField(List.of(COLUMN_ID)) - .destinationSyncMode(destinationSyncMode) - .primaryKey(List.of(List.of(COLUMN_NAME)))); - - LOGGER.info("Testing reset connection when temporal is in a terminal state"); - final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); - - terminateTemporalWorkflow(connectionId); - - apiClient.getConnectionApi().resetConnection(new ConnectionIdRequestBody().connectionId(connectionId)); - - LOGGER.info("Waiting for workflow to be recreated..."); - Thread.sleep(500); - - final WorkflowState workflowState = getWorkflowState(connectionId); - assertTrue(workflowState.isRunning()); - assertTrue(workflowState.isResetConnection()); - } + final String connectionName = "test-connection"; + final UUID sourceId = createPostgresSource().getSourceId(); + final UUID destinationId = createDestination().getDestinationId(); + final UUID operationId = createOperation().getOperationId(); + final AirbyteCatalog catalog = discoverSourceSchema(sourceId); + final SyncMode syncMode = SyncMode.INCREMENTAL; + final DestinationSyncMode destinationSyncMode = DestinationSyncMode.APPEND_DEDUP; + catalog.getStreams().forEach(s -> s.getConfig() + .syncMode(syncMode) + .cursorField(List.of(COLUMN_ID)) + .destinationSyncMode(destinationSyncMode) + .primaryKey(List.of(List.of(COLUMN_NAME)))); + + LOGGER.info("Testing reset connection when temporal is in a terminal state"); + final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); + + terminateTemporalWorkflow(connectionId); + + apiClient.getConnectionApi().resetConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + + LOGGER.info("Waiting for workflow to be recreated..."); + Thread.sleep(500); + + final WorkflowState workflowState = getWorkflowState(connectionId); + assertTrue(workflowState.isRunning()); + assertTrue(workflowState.isResetConnection()); } private WorkflowClient getWorkflowClient() { diff --git a/docker-compose.yaml b/docker-compose.yaml index 7711fa365edf..d35f62962332 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -71,7 +71,6 @@ services: - LOCAL_ROOT=${LOCAL_ROOT} - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT} - LOG_LEVEL=${LOG_LEVEL} - - NEW_SCHEDULER=${NEW_SCHEDULER} - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} - SYNC_JOB_MAX_ATTEMPTS=${SYNC_JOB_MAX_ATTEMPTS} - SYNC_JOB_MAX_TIMEOUT_DAYS=${SYNC_JOB_MAX_TIMEOUT_DAYS} @@ -149,7 +148,6 @@ services: - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} - JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-} - LOG_LEVEL=${LOG_LEVEL} - - NEW_SCHEDULER=${NEW_SCHEDULER} - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} - TEMPORAL_HOST=${TEMPORAL_HOST} - TRACKING_STRATEGY=${TRACKING_STRATEGY} diff --git a/kube/overlays/dev-integration-test-schedulerv2/.env b/kube/overlays/dev-integration-test-schedulerv2/.env deleted file mode 100644 index 6b923016fa5b..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/.env +++ /dev/null @@ -1,68 +0,0 @@ -AIRBYTE_VERSION=dev - -# Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db -DATABASE_HOST=airbyte-db-svc -DATABASE_PORT=5432 -DATABASE_DB=airbyte -# translate manually DATABASE_URL=jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT/${DATABASE_DB} -DATABASE_URL=jdbc:postgresql://airbyte-db-svc:5432/airbyte -JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.29.15.001 -CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.35.15.001 - -# When using the airbyte-db via default docker image: -CONFIG_ROOT=/configs -DATA_DOCKER_MOUNT=airbyte_data -DB_DOCKER_MOUNT=airbyte_db - -# Temporal.io worker configuration -TEMPORAL_HOST=airbyte-temporal-svc:7233 -TEMPORAL_WORKER_PORTS=9001,9002,9003,9004,9005,9006,9007,9008,9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,9022,9023,9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039,9040 - -# Workspace storage for running jobs (logs, etc) -WORKSPACE_ROOT=/workspace -WORKSPACE_DOCKER_MOUNT=airbyte_workspace - -LOCAL_ROOT=/tmp/airbyte_local - -# Maximum total simultaneous jobs across all worker nodes -SUBMITTER_NUM_THREADS=10 - -# Miscellaneous -TRACKING_STRATEGY=logging -WEBAPP_URL=airbyte-webapp-svc:80 -API_URL=/api/v1/ -INTERNAL_API_HOST=airbyte-server-svc:8001 - -WORKER_ENVIRONMENT=kubernetes -FULLSTORY=disabled -IS_DEMO=false -LOG_LEVEL=INFO - -# S3/Minio Log Configuration -S3_LOG_BUCKET=airbyte-dev-logs -S3_LOG_BUCKET_REGION= -S3_MINIO_ENDPOINT=http://airbyte-minio-svc:9000 -S3_PATH_STYLE_ACCESS=true - -# GCS Log Configuration -GCS_LOG_BUCKET= - -# State Storage Configuration -STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-dev-logs -STATE_STORAGE_MINIO_ENDPOINT=http://airbyte-minio-svc:9000 - -# Docker Resource Limits -JOB_MAIN_CONTAINER_CPU_REQUEST= -JOB_MAIN_CONTAINER_CPU_LIMIT= -JOB_MAIN_CONTAINER_MEMORY_REQUEST= -JOB_MAIN_CONTAINER_MEMORY_LIMIT= - -# Worker pod tolerations and node selectors -JOB_KUBE_TOLERATIONS= -JOB_KUBE_NODE_SELECTORS= - -# Job image pull policy -JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY= - -# Launch a separate pod to orchestrate sync steps -CONTAINER_ORCHESTRATOR_ENABLED=true diff --git a/kube/overlays/dev-integration-test-schedulerv2/.secrets b/kube/overlays/dev-integration-test-schedulerv2/.secrets deleted file mode 100644 index 67e8f4aae977..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/.secrets +++ /dev/null @@ -1,7 +0,0 @@ -DATABASE_USER=docker -DATABASE_PASSWORD=docker -AWS_ACCESS_KEY_ID=minio -AWS_SECRET_ACCESS_KEY=minio123 -GOOGLE_APPLICATION_CREDENTIALS= -STATE_STORAGE_MINIO_ACCESS_KEY=minio -STATE_STORAGE_MINIO_SECRET_ACCESS_KEY=minio123 diff --git a/kube/overlays/dev-integration-test-schedulerv2/bootloader-patch.yaml b/kube/overlays/dev-integration-test-schedulerv2/bootloader-patch.yaml deleted file mode 100644 index 381763fc6af0..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/bootloader-patch.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: airbyte-bootloader -spec: - containers: - - name: airbyte-bootloader-container - env: - - name: NEW_SCHEDULER - valueFrom: - configMapKeyRef: - name: airbyte-env - key: NEW_SCHEDULER diff --git a/kube/overlays/dev-integration-test-schedulerv2/kustomization.yaml b/kube/overlays/dev-integration-test-schedulerv2/kustomization.yaml deleted file mode 100644 index 06c4a8222a5c..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/kustomization.yaml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -namespace: default - -bases: - - ../../resources - -resources: - - sync-only-worker.yaml - - temporal-ui.yaml - -images: - - name: airbyte/db - newTag: dev - - name: airbyte/bootloader - newTag: dev - - name: airbyte/scheduler - newTag: dev - - name: airbyte/server - newTag: dev - - name: airbyte/webapp - newTag: dev - - name: airbyte/worker - newTag: dev - - name: temporalio/auto-setup - newTag: 1.7.0 - -configMapGenerator: - - name: airbyte-env - env: .env - -secretGenerator: - - name: airbyte-secrets - env: .secrets - -patchesStrategicMerge: - - bootloader-patch.yaml - - server-patch.yaml - - scheduler-patch.yaml - - worker-patch.yaml diff --git a/kube/overlays/dev-integration-test-schedulerv2/scheduler-patch.yaml b/kube/overlays/dev-integration-test-schedulerv2/scheduler-patch.yaml deleted file mode 100644 index e0e1ea4277b8..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/scheduler-patch.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: airbyte-scheduler -spec: - template: - spec: - containers: - - name: airbyte-scheduler-container - env: - - name: NEW_SCHEDULER - valueFrom: - configMapKeyRef: - name: airbyte-env - key: NEW_SCHEDULER diff --git a/kube/overlays/dev-integration-test-schedulerv2/server-patch.yaml b/kube/overlays/dev-integration-test-schedulerv2/server-patch.yaml deleted file mode 100644 index b5ffca6a98f1..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/server-patch.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: airbyte-server -spec: - template: - spec: - containers: - - name: airbyte-server-container - env: - - name: NEW_SCHEDULER - valueFrom: - configMapKeyRef: - name: airbyte-env - key: NEW_SCHEDULER diff --git a/kube/overlays/dev-integration-test-schedulerv2/sync-only-worker.yaml b/kube/overlays/dev-integration-test-schedulerv2/sync-only-worker.yaml deleted file mode 100644 index c9547abdbc91..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/sync-only-worker.yaml +++ /dev/null @@ -1,230 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: airbyte-sync-worker -spec: - replicas: 1 - selector: - matchLabels: - airbyte: sync-worker - template: - metadata: - labels: - airbyte: sync-worker - spec: - serviceAccountName: airbyte-admin - automountServiceAccountToken: true - containers: - - name: airbyte-worker-container - image: airbyte/worker - env: - - name: AIRBYTE_VERSION - valueFrom: - configMapKeyRef: - name: airbyte-env - key: AIRBYTE_VERSION - - name: CONFIG_ROOT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: CONFIG_ROOT - - name: DATABASE_HOST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: DATABASE_HOST - - name: DATABASE_PORT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: DATABASE_PORT - - name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: DATABASE_PASSWORD - - name: DATABASE_URL - valueFrom: - configMapKeyRef: - name: airbyte-env - key: DATABASE_URL - - name: DATABASE_USER - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: DATABASE_USER - - name: TRACKING_STRATEGY - valueFrom: - configMapKeyRef: - name: airbyte-env - key: TRACKING_STRATEGY - - name: WORKSPACE_DOCKER_MOUNT - value: workspace - - name: WORKSPACE_ROOT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: WORKSPACE_ROOT - - name: WORKER_ENVIRONMENT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: WORKER_ENVIRONMENT - - name: LOCAL_ROOT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: LOCAL_ROOT - - name: WEBAPP_URL - valueFrom: - configMapKeyRef: - name: airbyte-env - key: WEBAPP_URL - - name: TEMPORAL_HOST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: TEMPORAL_HOST - - name: TEMPORAL_WORKER_PORTS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: TEMPORAL_WORKER_PORTS - - name: LOG_LEVEL - valueFrom: - configMapKeyRef: - name: airbyte-env - key: LOG_LEVEL - - name: JOB_KUBE_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: SUBMITTER_NUM_THREADS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: SUBMITTER_NUM_THREADS - - name: JOB_MAIN_CONTAINER_CPU_REQUEST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_MAIN_CONTAINER_CPU_REQUEST - - name: JOB_MAIN_CONTAINER_CPU_LIMIT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_MAIN_CONTAINER_CPU_LIMIT - - name: JOB_MAIN_CONTAINER_MEMORY_REQUEST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_MAIN_CONTAINER_MEMORY_REQUEST - - name: JOB_MAIN_CONTAINER_MEMORY_LIMIT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_MAIN_CONTAINER_MEMORY_LIMIT - - name: S3_LOG_BUCKET - valueFrom: - configMapKeyRef: - name: airbyte-env - key: S3_LOG_BUCKET - - name: S3_LOG_BUCKET_REGION - valueFrom: - configMapKeyRef: - name: airbyte-env - key: S3_LOG_BUCKET_REGION - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: AWS_ACCESS_KEY_ID - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: AWS_SECRET_ACCESS_KEY - - name: S3_MINIO_ENDPOINT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: S3_MINIO_ENDPOINT - - name: S3_PATH_STYLE_ACCESS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: S3_PATH_STYLE_ACCESS - - name: GOOGLE_APPLICATION_CREDENTIALS - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: GOOGLE_APPLICATION_CREDENTIALS - - name: GCS_LOG_BUCKET - valueFrom: - configMapKeyRef: - name: airbyte-env - key: GCS_LOG_BUCKET - - name: INTERNAL_API_HOST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: INTERNAL_API_HOST - - name: JOB_KUBE_TOLERATIONS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_KUBE_TOLERATIONS - - name: JOB_KUBE_NODE_SELECTORS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_KUBE_NODE_SELECTORS - - name: JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY - # todo: add other state storage keys - - name: STATE_STORAGE_MINIO_BUCKET_NAME - valueFrom: - configMapKeyRef: - name: airbyte-env - key: STATE_STORAGE_MINIO_BUCKET_NAME - - name: STATE_STORAGE_MINIO_ENDPOINT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: STATE_STORAGE_MINIO_ENDPOINT - - name: STATE_STORAGE_MINIO_ACCESS_KEY - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: STATE_STORAGE_MINIO_ACCESS_KEY - - name: STATE_STORAGE_MINIO_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: STATE_STORAGE_MINIO_SECRET_ACCESS_KEY - - name: CONTAINER_ORCHESTRATOR_ENABLED - valueFrom: - configMapKeyRef: - name: airbyte-env - key: CONTAINER_ORCHESTRATOR_ENABLED - - name: SHOULD_RUN_GET_SPEC_WORKFLOWS - value: "false" - - name: SHOULD_RUN_CHECK_CONNECTION_WORKFLOWS - value: "false" - - name: SHOULD_RUN_DISCOVER_WORKFLOWS - value: "false" - - name: SHOULD_RUN_SYNC_WORKFLOWS - value: "true" - - name: SHOULD_RUN_CONNECTION_MANAGER_WORKFLOWS - value: "false" - volumeMounts: - - name: gcs-log-creds-volume - mountPath: /secrets/gcs-log-creds - readOnly: true - volumes: - - name: gcs-log-creds-volume - secret: - secretName: gcs-log-creds diff --git a/kube/overlays/dev-integration-test-schedulerv2/temporal-ui.yaml b/kube/overlays/dev-integration-test-schedulerv2/temporal-ui.yaml deleted file mode 100644 index 541c3ad2ca12..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/temporal-ui.yaml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: airbyte-temporal-ui -spec: - type: NodePort - ports: - - port: 8088 - protocol: TCP - selector: - airbyte: temporal-ui ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: airbyte-temporal-ui -spec: - replicas: 1 - selector: - matchLabels: - airbyte: temporal-ui - template: - metadata: - labels: - airbyte: temporal-ui - spec: - containers: - - name: airbyte-temporal-ui-container - image: temporalio/web:1.13.0 - env: - - name: TEMPORAL_GRPC_ENDPOINT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: TEMPORAL_HOST - - name: TEMPORAL_PERMIT_WRITE_API - value: "true" diff --git a/kube/overlays/dev-integration-test-schedulerv2/worker-patch.yaml b/kube/overlays/dev-integration-test-schedulerv2/worker-patch.yaml deleted file mode 100644 index da72c3630f95..000000000000 --- a/kube/overlays/dev-integration-test-schedulerv2/worker-patch.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: airbyte-worker -spec: - template: - spec: - containers: - - name: airbyte-worker-container - env: - - name: CONTAINER_ORCHESTRATOR_ENABLED - valueFrom: - configMapKeyRef: - name: airbyte-env - key: CONTAINER_ORCHESTRATOR_ENABLED - - name: SHOULD_RUN_SYNC_WORKFLOWS - value: "false" From f0a1df788b8323f1e0b50f0708b4ddfb6c992385 Mon Sep 17 00:00:00 2001 From: lmossman Date: Wed, 1 Jun 2022 18:15:46 -0700 Subject: [PATCH 02/14] remove airbyte-scheduler from deployments and docs --- .bumpversion.cfg | 2 - .../metrics/lib/MetricEmittingApps.java | 1 - airbyte-scheduler/client/readme.md | 1 - .../scheduler/client/SchedulerJobClient.java | 40 --- airbyte-server/build.gradle | 1 - build.gradle | 1 - .../templates/scheduler/deployment.yaml | 249 ------------------ docker-compose-cloud.build.yaml | 7 - docker-compose.build.yaml | 7 - docker-compose.yaml | 37 --- docs/deploying-airbyte/on-kubernetes.md | 4 - docs/operator-guides/upgrading-airbyte.md | 2 +- docs/troubleshooting/on-deploying.md | 9 +- .../dev-integration-test/kustomization.yaml | 2 - kube/overlays/dev/kustomization.yaml | 2 - .../kustomization.yaml | 2 - .../set-resource-limits.yaml | 20 -- kube/overlays/stable/kustomization.yaml | 2 - ...ccount.yaml => admin-service-account.yaml} | 0 kube/resources/kustomization.yaml | 3 +- kube/resources/scheduler.yaml | 177 ------------- settings.gradle | 1 - tools/bin/acceptance_test_kube.sh | 6 +- 23 files changed, 7 insertions(+), 569 deletions(-) delete mode 100644 airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/SchedulerJobClient.java delete mode 100644 charts/airbyte/templates/scheduler/deployment.yaml rename kube/resources/{scheduler-service-account.yaml => admin-service-account.yaml} (100%) delete mode 100644 kube/resources/scheduler.yaml diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 3902ab0e940f..530f0dbbe695 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -16,8 +16,6 @@ serialize = [bumpversion:file:airbyte-metrics/reporter/Dockerfile] -[bumpversion:file:airbyte-scheduler/app/Dockerfile] - [bumpversion:file:airbyte-server/Dockerfile] [bumpversion:file:airbyte-webapp/package.json] diff --git a/airbyte-metrics/lib/src/main/java/io/airbyte/metrics/lib/MetricEmittingApps.java b/airbyte-metrics/lib/src/main/java/io/airbyte/metrics/lib/MetricEmittingApps.java index 0c1aaa9b8969..c54667d8a775 100644 --- a/airbyte-metrics/lib/src/main/java/io/airbyte/metrics/lib/MetricEmittingApps.java +++ b/airbyte-metrics/lib/src/main/java/io/airbyte/metrics/lib/MetricEmittingApps.java @@ -22,7 +22,6 @@ public enum MetricEmittingApps implements MetricEmittingApp { METRICS_REPORTER("metrics-reporter"), - SCHEDULER("scheduler"), WORKER("worker"); private String applicationName; diff --git a/airbyte-scheduler/client/readme.md b/airbyte-scheduler/client/readme.md index a74ab59ba7b5..a9e529fd5203 100644 --- a/airbyte-scheduler/client/readme.md +++ b/airbyte-scheduler/client/readme.md @@ -3,5 +3,4 @@ Java clients for submitting Jobs. ## Key Files -* `SchedulerJobClient` - interface for scheduling _asynchronous_ jobs (i.e. sync and reset). * `SynchronousSchedulerClient` - interface for scheduling _synchronous_ jobs. diff --git a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/SchedulerJobClient.java b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/SchedulerJobClient.java deleted file mode 100644 index 0670dc4e5988..000000000000 --- a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/SchedulerJobClient.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.scheduler.client; - -import io.airbyte.config.ActorDefinitionResourceRequirements; -import io.airbyte.config.DestinationConnection; -import io.airbyte.config.SourceConnection; -import io.airbyte.config.StandardSync; -import io.airbyte.config.StandardSyncOperation; -import io.airbyte.scheduler.models.Job; -import java.io.IOException; -import java.util.List; -import javax.annotation.Nullable; - -/** - * Exposes a way of executing short-lived jobs as RPC calls. If it returns successfully, it - * guarantees a job was submitted. It does not wait for that job to complete. Jobs submitted in by - * this client are persisted in the Jobs table. It returns the full job object. - */ -public interface SchedulerJobClient { - - Job createOrGetActiveSyncJob(SourceConnection source, - DestinationConnection destination, - StandardSync standardSync, - String sourceDockerImage, - String destinationDockerImage, - List standardSyncOperations, - @Nullable ActorDefinitionResourceRequirements sourceResourceRequirements, - @Nullable ActorDefinitionResourceRequirements destinationResourceRequirements) - throws IOException; - - Job createOrGetActiveResetConnectionJob(DestinationConnection destination, - StandardSync standardSync, - String destinationDockerImage, - List standardSyncOperations) - throws IOException; - -} diff --git a/airbyte-server/build.gradle b/airbyte-server/build.gradle index c108f322221c..4cd7178d695f 100644 --- a/airbyte-server/build.gradle +++ b/airbyte-server/build.gradle @@ -79,7 +79,6 @@ dependencies { implementation project(':airbyte-notification') implementation project(':airbyte-oauth') implementation project(':airbyte-protocol:models') - implementation project(':airbyte-scheduler:app') implementation project(':airbyte-scheduler:client') implementation project(':airbyte-scheduler:models') implementation project(':airbyte-scheduler:persistence') diff --git a/build.gradle b/build.gradle index 15e48e3b357c..72e40e3191ae 100644 --- a/build.gradle +++ b/build.gradle @@ -440,7 +440,6 @@ task('generate-docker') { dependsOn(':airbyte-workers:assemble') dependsOn(':airbyte-webapp:assemble') dependsOn(':airbyte-server:assemble') - dependsOn(':airbyte-scheduler:app:assemble') dependsOn(':airbyte-db:lib:assemble') dependsOn(':airbyte-config:init:assemble') dependsOn(':airbyte-temporal:assemble') diff --git a/charts/airbyte/templates/scheduler/deployment.yaml b/charts/airbyte/templates/scheduler/deployment.yaml deleted file mode 100644 index eacd3e55e93d..000000000000 --- a/charts/airbyte/templates/scheduler/deployment.yaml +++ /dev/null @@ -1,249 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "common.names.fullname" . }}-scheduler - labels: - {{- include "airbyte.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.scheduler.replicaCount }} - selector: - matchLabels: - airbyte: scheduler - template: - metadata: - labels: - airbyte: scheduler - {{- if .Values.scheduler.podAnnotations }} - annotations: - {{- include "common.tplvalues.render" (dict "value" .Values.scheduler.podAnnotations "context" $) | nindent 8 }} - {{- end }} - spec: - serviceAccountName: {{ include "airbyte.serviceAccountName" . }} - {{- if .Values.scheduler.nodeSelector }} - nodeSelector: {{- include "common.tplvalues.render" (dict "value" .Values.scheduler.nodeSelector "context" $) | nindent 8 }} - {{- end }} - {{- if .Values.scheduler.tolerations }} - tolerations: {{- include "common.tplvalues.render" (dict "value" .Values.scheduler.tolerations "context" $) | nindent 8 }} - {{- end }} - {{- if .Values.scheduler.affinity }} - affinity: {{- include "common.tplvalues.render" (dict "value" .Values.scheduler.affinity "context" $) | nindent 8 }} - {{- end }} - containers: - - name: airbyte-scheduler-container - image: {{ include "airbyte.schedulerImage" . }} - imagePullPolicy: "{{ .Values.scheduler.image.pullPolicy }}" - env: - - name: AIRBYTE_VERSION - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: AIRBYTE_VERSION - - name: CONFIG_ROOT - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: CONFIG_ROOT - - name: DATABASE_HOST - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: DATABASE_HOST - - name: DATABASE_PORT - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: DATABASE_PORT - - name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "airbyte.database.secret.name" . }} - key: {{ include "airbyte.database.secret.passwordKey" . }} - - name: DATABASE_URL - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: DATABASE_URL - - name: DATABASE_USER - valueFrom: - secretKeyRef: - name: {{ include "common.names.fullname" . }}-secrets - key: DATABASE_USER - - name: TRACKING_STRATEGY - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: TRACKING_STRATEGY - - name: WORKSPACE_DOCKER_MOUNT - value: workspace - - name: WORKSPACE_ROOT - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: WORKSPACE_ROOT - - name: WORKER_ENVIRONMENT - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: WORKER_ENVIRONMENT - - name: LOCAL_ROOT - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: LOCAL_ROOT - - name: WEBAPP_URL - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: WEBAPP_URL - - name: TEMPORAL_HOST - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: TEMPORAL_HOST - - name: TEMPORAL_WORKER_PORTS - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: TEMPORAL_WORKER_PORTS - - name: LOG_LEVEL - value: "{{ .Values.scheduler.log.level }}" - - name: JOB_KUBE_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: SUBMITTER_NUM_THREADS - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: SUBMITTER_NUM_THREADS - - name: JOB_MAIN_CONTAINER_CPU_REQUEST - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: JOB_MAIN_CONTAINER_CPU_REQUEST - - name: JOB_MAIN_CONTAINER_CPU_LIMIT - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: JOB_MAIN_CONTAINER_CPU_LIMIT - - name: JOB_MAIN_CONTAINER_MEMORY_REQUEST - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: JOB_MAIN_CONTAINER_MEMORY_REQUEST - - name: JOB_MAIN_CONTAINER_MEMORY_LIMIT - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: JOB_MAIN_CONTAINER_MEMORY_LIMIT - - name: S3_LOG_BUCKET - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: S3_LOG_BUCKET - - name: S3_LOG_BUCKET_REGION - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: S3_LOG_BUCKET_REGION - {{- if and .Values.logs.accessKey.existingSecret .Values.logs.accessKey.existingSecretKey }} - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: {{ .Values.logs.accessKey.existingSecret }} - key: {{ .Values.logs.accessKey.existingSecretKey }} - {{- else }} - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: {{ include "common.names.fullname" . }}-secrets - key: AWS_ACCESS_KEY_ID - {{- end }} - {{- if and .Values.logs.secretKey.existingSecret .Values.logs.secretKey.existingSecretKey }} - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.logs.secretKey.existingSecret }} - key: {{ .Values.logs.secretKey.existingSecretKey }} - {{- else }} - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: {{ include "common.names.fullname" . }}-secrets - key: AWS_SECRET_ACCESS_KEY - {{- end }} - - name: S3_MINIO_ENDPOINT - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: S3_MINIO_ENDPOINT - - name: S3_PATH_STYLE_ACCESS - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: S3_PATH_STYLE_ACCESS - - name: GOOGLE_APPLICATION_CREDENTIALS - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: GOOGLE_APPLICATION_CREDENTIALS - - name: GCS_LOG_BUCKET - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: GCS_LOG_BUCKET - - name: INTERNAL_API_HOST - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: INTERNAL_API_HOST - {{- if .Values.scheduler.extraEnv }} - {{ .Values.scheduler.extraEnv | toYaml | nindent 8 }} - {{- end }} - {{- if .Values.scheduler.resources }} - resources: {{- toYaml .Values.scheduler.resources | nindent 10 }} - {{- end }} - {{- if .Values.scheduler.containerSecurityContext }} - securityContext: {{- toYaml .Values.scheduler.containerSecurityContext | nindent 10 }} - {{- end }} - {{- if .Values.scheduler.livenessProbe.enabled }} - livenessProbe: - exec: - command: - - /bin/sh - - -ec - - grep -qa airbyte.scheduler.app.SchedulerApp /proc/1/cmdline - initialDelaySeconds: {{ .Values.scheduler.livenessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.scheduler.livenessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.scheduler.livenessProbe.timeoutSeconds }} - successThreshold: {{ .Values.scheduler.livenessProbe.successThreshold }} - failureThreshold: {{ .Values.scheduler.livenessProbe.failureThreshold }} - {{- end }} - {{- if .Values.scheduler.readinessProbe.enabled }} - readinessProbe: - exec: - command: - - /bin/sh - - -ec - - grep -qa airbyte.scheduler.app.SchedulerApp /proc/1/cmdline - initialDelaySeconds: {{ .Values.scheduler.readinessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.scheduler.readinessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.scheduler.readinessProbe.timeoutSeconds }} - successThreshold: {{ .Values.scheduler.readinessProbe.successThreshold }} - failureThreshold: {{ .Values.scheduler.readinessProbe.failureThreshold }} - {{- end }} - volumeMounts: - - name: gcs-log-creds-volume - mountPath: /secrets/gcs-log-creds - readOnly: true - {{- if .Values.scheduler.extraVolumeMounts }} - {{ toYaml .Values.scheduler.extraVolumeMounts | nindent 8 }} - {{- end }} - volumes: - - name: gcs-log-creds-volume - secret: - secretName: {{ include "common.names.fullname" . }}-gcs-log-creds - {{- if .Values.scheduler.extraVolumes }} -{{ toYaml .Values.scheduler.extraVolumes | nindent 6 }} - {{- end }} diff --git a/docker-compose-cloud.build.yaml b/docker-compose-cloud.build.yaml index 55caf897ecb1..0b0f81eccc0d 100644 --- a/docker-compose-cloud.build.yaml +++ b/docker-compose-cloud.build.yaml @@ -4,13 +4,6 @@ version: "3.7" services: - scheduler: - image: airbyte/scheduler:${VERSION} - build: - dockerfile: Dockerfile - context: airbyte-scheduler/app - labels: - io.airbyte.git-revision: ${GIT_REVISION} worker: image: airbyte/worker:${VERSION} build: diff --git a/docker-compose.build.yaml b/docker-compose.build.yaml index ece749866af6..91c5fe013115 100644 --- a/docker-compose.build.yaml +++ b/docker-compose.build.yaml @@ -22,13 +22,6 @@ services: context: airbyte-db/lib labels: io.airbyte.git-revision: ${GIT_REVISION} - scheduler: - image: airbyte/scheduler:${VERSION} - build: - dockerfile: Dockerfile - context: airbyte-scheduler/app - labels: - io.airbyte.git-revision: ${GIT_REVISION} worker: image: airbyte/worker:${VERSION} build: diff --git a/docker-compose.yaml b/docker-compose.yaml index d35f62962332..39c032b50b8d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -48,43 +48,6 @@ services: - POSTGRES_USER=${DATABASE_USER} volumes: - db:/var/lib/postgresql/data - scheduler: - image: airbyte/scheduler:${VERSION} - logging: *default-logging - container_name: airbyte-scheduler - restart: unless-stopped - environment: - - AIRBYTE_ROLE=${AIRBYTE_ROLE:-} - - AIRBYTE_VERSION=${VERSION} - - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-} - - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-} - - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} - - CONFIG_ROOT=${CONFIG_ROOT} - - DATABASE_PASSWORD=${DATABASE_PASSWORD} - - DATABASE_URL=${DATABASE_URL} - - DATABASE_USER=${DATABASE_USER} - - INTERNAL_API_HOST=${INTERNAL_API_HOST} - - JOB_MAIN_CONTAINER_CPU_LIMIT=${JOB_MAIN_CONTAINER_CPU_LIMIT} - - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST} - - JOB_MAIN_CONTAINER_MEMORY_LIMIT=${JOB_MAIN_CONTAINER_MEMORY_LIMIT} - - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} - - LOCAL_ROOT=${LOCAL_ROOT} - - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT} - - LOG_LEVEL=${LOG_LEVEL} - - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} - - SYNC_JOB_MAX_ATTEMPTS=${SYNC_JOB_MAX_ATTEMPTS} - - SYNC_JOB_MAX_TIMEOUT_DAYS=${SYNC_JOB_MAX_TIMEOUT_DAYS} - - SUBMITTER_NUM_THREADS=${SUBMITTER_NUM_THREADS} - - TEMPORAL_HOST=${TEMPORAL_HOST} - - TRACKING_STRATEGY=${TRACKING_STRATEGY} - - WEBAPP_URL=${WEBAPP_URL} - - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} - - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT} - - WORKSPACE_ROOT=${WORKSPACE_ROOT} - volumes: - - data:${CONFIG_ROOT} - - workspace:${WORKSPACE_ROOT} - - ${LOCAL_ROOT}:${LOCAL_ROOT} worker: image: airbyte/worker:${VERSION} logging: *default-logging diff --git a/docs/deploying-airbyte/on-kubernetes.md b/docs/deploying-airbyte/on-kubernetes.md index e2867f591eb2..08a7ad2e703a 100644 --- a/docs/deploying-airbyte/on-kubernetes.md +++ b/docs/deploying-airbyte/on-kubernetes.md @@ -219,10 +219,6 @@ Check out the [Helm Chart Readme](https://github.com/airbytehq/airbyte/tree/mast `kubectl logs deployments/airbyte-server` to view real-time logs. Logs can also be downloaded as a text file via the Admin tab in the UI. -### View Scheduler or Job Logs - -`kubectl logs deployments/airbyte-scheduler` to view real-time logs. Logs can also be downloaded as a text file via the Admin tab in the UI. - ### Connector Container Logs Although all logs can be accessed by viewing the scheduler logs, connector container logs may be easier to understand when isolated by accessing from the Airbyte UI or the [Airbyte API](../api-documentation.md) for a specific job attempt. Connector pods launched by Airbyte will not relay logs directly to Kubernetes logging. You must access these logs through Airbyte. diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 63c759a9b1eb..0368f1a0a066 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -66,7 +66,7 @@ If you are upgrading from (i.e. your current version of Airbyte is) Airbyte vers 1. In a terminal, on the host where Airbyte is running, turn off Airbyte. ```bash - kubectl delete deployments airbyte-db airbyte-scheduler airbyte-server airbyte-temporal airbyte-webapp --namespace= + kubectl delete deployments airbyte-db airbyte-worker airbyte-server airbyte-temporal airbyte-webapp --namespace= ``` 2. Upgrade the kube deployment to new version. diff --git a/docs/troubleshooting/on-deploying.md b/docs/troubleshooting/on-deploying.md index e9175bae3cda..7d97c3ca7ac5 100644 --- a/docs/troubleshooting/on-deploying.md +++ b/docs/troubleshooting/on-deploying.md @@ -41,14 +41,13 @@ Check if all Airbyte containers are running, executing: `docker ps` ```text CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -f45f3cfe1e16 airbyte/scheduler:1.11.1-alpha "/bin/bash -c './wai…" 2 hours ago Up 2 hours airbyte-scheduler f02fc709b130 airbyte/server:1.11.1-alpha "/bin/bash -c './wai…" 2 hours ago Up 2 hours 8000/tcp, [...] :::8001->8001/tcp airbyte-server 153b2b322870 airbyte/webapp:1.11.1-alpha "/docker-entrypoint.…" 2 hours ago Up 2 hours :::8000->80/tcp airbyte-webapp b88d94652268 airbyte/db:1.11.1-alpha "docker-entrypoint.s…" 2 hours ago Up 2 hours 5432/tcp airbyte-db 0573681a10e0 temporalio/auto-setup:1.7.0 "/entrypoint.sh /bin…" 2 hours ago Up 2 hours 6933-6935/tcp, [...] airbyte-temporal ``` -You must see 5 containers running. If you are not seeing execute the following steps: +You must see 4 containers running. If you are not seeing execute the following steps: * `docker-compose down -v` * `docker-compose up` @@ -61,11 +60,11 @@ First, let's check the server logs by running `docker logs airbyte-server | grep If this command returns any output, please run `docker logs airbyte-server > airbyte-server.log`. This command will create a file in the current directory. We advise you to send a message on our \#issues on Slack channel -If you don't have any server errors let's check the scheduler, `docker logs airbyte-scheduler | grep ERROR`. - If this command returns any output, please run `docker logs airbyte-scheduler > airbyte-scheduler.log`. +If you don't have any server errors let's check the worker, `docker logs airbyte-worker | grep ERROR`. + If this command returns any output, please run `docker logs airbyte-worker > airbyte-worker.log`. This command will create a file in the current directory. We advise you to send a message on our \#issues on Slack channel -If there is no error printed in both cases, we recommend running: `docker restart airbyte-server airbyte-scheduler` +If there is no error printed in both cases, we recommend running: `docker restart airbyte-server airbyte-worker` Wait a few moments and try to access the interface again. ## `docker.errors.DockerException`: Error while fetching server API version diff --git a/kube/overlays/dev-integration-test/kustomization.yaml b/kube/overlays/dev-integration-test/kustomization.yaml index 04f0725377a9..76c36a1d02e7 100644 --- a/kube/overlays/dev-integration-test/kustomization.yaml +++ b/kube/overlays/dev-integration-test/kustomization.yaml @@ -11,8 +11,6 @@ images: newTag: dev - name: airbyte/bootloader newTag: dev - - name: airbyte/scheduler - newTag: dev - name: airbyte/server newTag: dev - name: airbyte/webapp diff --git a/kube/overlays/dev/kustomization.yaml b/kube/overlays/dev/kustomization.yaml index 5842f9ccb052..d6525f134e5d 100644 --- a/kube/overlays/dev/kustomization.yaml +++ b/kube/overlays/dev/kustomization.yaml @@ -11,8 +11,6 @@ images: newTag: dev - name: airbyte/bootloader newTag: dev - - name: airbyte/scheduler - newTag: dev - name: airbyte/server newTag: dev - name: airbyte/webapp diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 685b7898d33a..ee6aaed3fe0b 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -11,8 +11,6 @@ images: newTag: 0.39.7-alpha - name: airbyte/bootloader newTag: 0.39.7-alpha - - name: airbyte/scheduler - newTag: 0.39.7-alpha - name: airbyte/server newTag: 0.39.7-alpha - name: airbyte/webapp diff --git a/kube/overlays/stable-with-resource-limits/set-resource-limits.yaml b/kube/overlays/stable-with-resource-limits/set-resource-limits.yaml index 6218fae28e75..09d50ab9baef 100644 --- a/kube/overlays/stable-with-resource-limits/set-resource-limits.yaml +++ b/kube/overlays/stable-with-resource-limits/set-resource-limits.yaml @@ -14,26 +14,6 @@ spec: --- apiVersion: apps/v1 kind: Deployment -metadata: - name: airbyte-scheduler -spec: - template: - spec: - containers: - - name: airbyte-scheduler-container - env: - - name: CONNECTOR_SPECIFIC_RESOURCE_DEFAULTS_ENABLED - valueFrom: - configMapKeyRef: - name: airbyte-env - key: CONNECTOR_SPECIFIC_RESOURCE_DEFAULTS_ENABLED - resources: - limits: - cpu: 2 - memory: 512Mi ---- -apiVersion: apps/v1 -kind: Deployment metadata: name: airbyte-worker spec: diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 48a1fa6fca25..2bc77a070688 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -11,8 +11,6 @@ images: newTag: 0.39.7-alpha - name: airbyte/bootloader newTag: 0.39.7-alpha - - name: airbyte/scheduler - newTag: 0.39.7-alpha - name: airbyte/server newTag: 0.39.7-alpha - name: airbyte/webapp diff --git a/kube/resources/scheduler-service-account.yaml b/kube/resources/admin-service-account.yaml similarity index 100% rename from kube/resources/scheduler-service-account.yaml rename to kube/resources/admin-service-account.yaml diff --git a/kube/resources/kustomization.yaml b/kube/resources/kustomization.yaml index b00b02668c65..8752d16cdc8c 100644 --- a/kube/resources/kustomization.yaml +++ b/kube/resources/kustomization.yaml @@ -6,9 +6,8 @@ resources: - bootloader.yaml - db.yaml - pod-sweeper.yaml - - scheduler.yaml - secret-gcs-log-creds.yaml - - scheduler-service-account.yaml + - admin-service-account.yaml - server.yaml - temporal.yaml - volume-configs.yaml diff --git a/kube/resources/scheduler.yaml b/kube/resources/scheduler.yaml deleted file mode 100644 index 82457cafd94a..000000000000 --- a/kube/resources/scheduler.yaml +++ /dev/null @@ -1,177 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: airbyte-scheduler -spec: - replicas: 1 - selector: - matchLabels: - airbyte: scheduler - template: - metadata: - labels: - airbyte: scheduler - spec: - containers: - - name: airbyte-scheduler-container - image: airbyte/scheduler - env: - - name: AIRBYTE_VERSION - valueFrom: - configMapKeyRef: - name: airbyte-env - key: AIRBYTE_VERSION - - name: CONFIG_ROOT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: CONFIG_ROOT - - name: DATABASE_HOST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: DATABASE_HOST - - name: DATABASE_PORT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: DATABASE_PORT - - name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: DATABASE_PASSWORD - - name: DATABASE_URL - valueFrom: - configMapKeyRef: - name: airbyte-env - key: DATABASE_URL - - name: DATABASE_USER - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: DATABASE_USER - - name: TRACKING_STRATEGY - valueFrom: - configMapKeyRef: - name: airbyte-env - key: TRACKING_STRATEGY - - name: WORKSPACE_DOCKER_MOUNT - value: workspace - - name: WORKSPACE_ROOT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: WORKSPACE_ROOT - - name: WORKER_ENVIRONMENT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: WORKER_ENVIRONMENT - - name: LOCAL_ROOT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: LOCAL_ROOT - - name: WEBAPP_URL - valueFrom: - configMapKeyRef: - name: airbyte-env - key: WEBAPP_URL - - name: TEMPORAL_HOST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: TEMPORAL_HOST - - name: TEMPORAL_WORKER_PORTS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: TEMPORAL_WORKER_PORTS - - name: LOG_LEVEL - valueFrom: - configMapKeyRef: - name: airbyte-env - key: LOG_LEVEL - - name: JOB_KUBE_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: SUBMITTER_NUM_THREADS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: SUBMITTER_NUM_THREADS - - name: JOB_MAIN_CONTAINER_CPU_REQUEST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_MAIN_CONTAINER_CPU_REQUEST - - name: JOB_MAIN_CONTAINER_CPU_LIMIT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_MAIN_CONTAINER_CPU_LIMIT - - name: JOB_MAIN_CONTAINER_MEMORY_REQUEST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_MAIN_CONTAINER_MEMORY_REQUEST - - name: JOB_MAIN_CONTAINER_MEMORY_LIMIT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: JOB_MAIN_CONTAINER_MEMORY_LIMIT - - name: S3_LOG_BUCKET - valueFrom: - configMapKeyRef: - name: airbyte-env - key: S3_LOG_BUCKET - - name: S3_LOG_BUCKET_REGION - valueFrom: - configMapKeyRef: - name: airbyte-env - key: S3_LOG_BUCKET_REGION - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: AWS_ACCESS_KEY_ID - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: AWS_SECRET_ACCESS_KEY - - name: S3_MINIO_ENDPOINT - valueFrom: - configMapKeyRef: - name: airbyte-env - key: S3_MINIO_ENDPOINT - - name: S3_PATH_STYLE_ACCESS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: S3_PATH_STYLE_ACCESS - - name: GOOGLE_APPLICATION_CREDENTIALS - valueFrom: - secretKeyRef: - name: airbyte-secrets - key: GOOGLE_APPLICATION_CREDENTIALS - - name: GCS_LOG_BUCKET - valueFrom: - configMapKeyRef: - name: airbyte-env - key: GCS_LOG_BUCKET - - name: INTERNAL_API_HOST - valueFrom: - configMapKeyRef: - name: airbyte-env - key: INTERNAL_API_HOST - volumeMounts: - - name: gcs-log-creds-volume - mountPath: /secrets/gcs-log-creds - readOnly: true - volumes: - - name: gcs-log-creds-volume - secret: - secretName: gcs-log-creds diff --git a/settings.gradle b/settings.gradle index daf40b71357b..42af820e5a64 100644 --- a/settings.gradle +++ b/settings.gradle @@ -71,7 +71,6 @@ if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD" include ':airbyte-config:specs' include ':airbyte-container-orchestrator' include ':airbyte-metrics:reporter' - include ':airbyte-scheduler:app' include ':airbyte-scheduler:client' include ':airbyte-server' include ':airbyte-temporal' diff --git a/tools/bin/acceptance_test_kube.sh b/tools/bin/acceptance_test_kube.sh index a19122753156..f6638327ec01 100755 --- a/tools/bin/acceptance_test_kube.sh +++ b/tools/bin/acceptance_test_kube.sh @@ -11,7 +11,6 @@ assert_root if [ -n "$CI" ]; then echo "Loading images into KIND..." kind load docker-image airbyte/server:dev --name chart-testing & - kind load docker-image airbyte/scheduler:dev --name chart-testing & kind load docker-image airbyte/webapp:dev --name chart-testing & kind load docker-image airbyte/worker:dev --name chart-testing & kind load docker-image airbyte/db:dev --name chart-testing & @@ -25,9 +24,8 @@ echo "Starting app..." echo "Applying dev-integration-test manifests to kubernetes..." kubectl apply -k kube/overlays/dev-integration-test -echo "Waiting for server and scheduler to be ready..." +echo "Waiting for server to be ready..." kubectl wait --for=condition=Available deployment/airbyte-server --timeout=300s || (kubectl describe pods && exit 1) -kubectl wait --for=condition=Available deployment/airbyte-scheduler --timeout=300s || (kubectl describe pods && exit 1) echo "Listing nodes scheduled for pods..." kubectl describe pods | grep "Name\|Node" @@ -38,7 +36,6 @@ sleep 120s if [ -n "$CI" ]; then bootloader_logs () { kubectl logs pod/airbyte-bootloader > /tmp/kubernetes_logs/bootloader.txt; } server_logs () { kubectl logs deployment.apps/airbyte-server > /tmp/kubernetes_logs/server.txt; } - scheduler_logs () { kubectl logs deployment.apps/airbyte-scheduler > /tmp/kubernetes_logs/scheduler.txt; } pod_sweeper_logs () { kubectl logs deployment.apps/airbyte-pod-sweeper > /tmp/kubernetes_logs/pod_sweeper.txt; } worker_logs () { kubectl logs deployment.apps/airbyte-worker > /tmp/kubernetes_logs/worker.txt; } db_logs () { kubectl logs deployment.apps/airbyte-db > /tmp/kubernetes_logs/db.txt; } @@ -48,7 +45,6 @@ if [ -n "$CI" ]; then write_all_logs () { bootloader_logs; server_logs; - scheduler_logs; worker_logs; db_logs; temporal_logs; From ab3df34a738cfd232ee48a3f625815bbece94331 Mon Sep 17 00:00:00 2001 From: lmossman Date: Wed, 1 Jun 2022 18:16:07 -0700 Subject: [PATCH 03/14] format --- .../java/io/airbyte/test/acceptance/AcceptanceTests.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java index d8947ac499aa..51a792063820 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java @@ -72,8 +72,6 @@ import io.airbyte.api.client.model.generated.SourceIdRequestBody; import io.airbyte.api.client.model.generated.SourceRead; import io.airbyte.api.client.model.generated.SyncMode; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.MoreBooleans; import io.airbyte.commons.resources.MoreResources; From ce2b6204f36d277adaa1d442915ec5af3ef5777f Mon Sep 17 00:00:00 2001 From: lmossman Date: Wed, 1 Jun 2022 18:20:03 -0700 Subject: [PATCH 04/14] remove 'v2' from github actions --- .github/workflows/gradle.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index ed2cb26e14b1..3566372ccbfc 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -521,11 +521,11 @@ jobs: label: ${{ needs.start-platform-build-runner.outputs.label }} ec2-instance-id: ${{ needs.start-platform-build-runner.outputs.ec2-instance-id }} - ## Kube Acceptance Tests (with scheduler v2 - both temporal changes and container orchestrator) + ## Kube Acceptance Tests # Docker acceptance tests run as part of the build job. # In case of self-hosted EC2 errors, remove this block. - start-kube-acceptance-test-runner-v2: - name: "Platform: Start Scheduler V2 Kube Acceptance Test Runner" + start-kube-acceptance-test-runner: + name: "Platform: Start Kube Acceptance Test Runner" needs: - changes - find_valid_pat @@ -548,11 +548,11 @@ jobs: aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} github-token: ${{ needs.find_valid_pat.outputs.pat }} - kube-acceptance-test-v2: - name: "Platform: Acceptance Tests (Kube v2)" + kube-acceptance-test: + name: "Platform: Acceptance Tests (Kube)" # In case of self-hosted EC2 errors, removed the `needs` line and switch back to running on ubuntu-latest. - needs: start-kube-acceptance-test-runner-v2 # required to start the main job when the runner is ready - runs-on: ${{ needs.start-kube-acceptance-test-runner-v2.outputs.label }} # run the job on the newly created runner + needs: start-kube-acceptance-test-runner # required to start the main job when the runner is ready + runs-on: ${{ needs.start-kube-acceptance-test-runner.outputs.label }} # run the job on the newly created runner environment: more-secrets timeout-minutes: 90 steps: @@ -631,17 +631,17 @@ jobs: name: Kubernetes Logs path: /tmp/kubernetes_logs/* # In case of self-hosted EC2 errors, remove this block. - stop-kube-acceptance-test-runner-v2: + stop-kube-acceptance-test-runner: name: "Platform: Stop Kube Acceptance Test EC2 Runner" timeout-minutes: 10 needs: - - start-kube-acceptance-test-runner-v2 # required to get output from the start-runner job - - kube-acceptance-test-v2 # required to wait when the main job is done + - start-kube-acceptance-test-runner # required to get output from the start-runner job + - kube-acceptance-test # required to wait when the main job is done - find_valid_pat runs-on: ubuntu-latest # Always is required to stop the runner even if the previous job has errors. However always() runs even if the previous step is skipped. # Thus, we check for skipped here. - if: ${{ always() && needs.start-kube-acceptance-test-runner-v2.result != 'skipped'}} + if: ${{ always() && needs.start-kube-acceptance-test-runner.result != 'skipped'}} steps: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v1 @@ -654,8 +654,8 @@ jobs: with: mode: stop github-token: ${{ needs.find_valid_pat.outputs.pat }} - label: ${{ needs.start-kube-acceptance-test-runner-v2.outputs.label }} - ec2-instance-id: ${{ needs.start-kube-acceptance-test-runner-v2.outputs.ec2-instance-id }} + label: ${{ needs.start-kube-acceptance-test-runner.outputs.label }} + ec2-instance-id: ${{ needs.start-kube-acceptance-test-runner.outputs.ec2-instance-id }} notify-failure-slack-channel: name: "Notify Slack Channel on Build Failures" @@ -665,7 +665,7 @@ jobs: - frontend-build - octavia-cli-build - platform-build - - kube-acceptance-test-v2 + - kube-acceptance-test if: ${{ failure() && github.ref == 'refs/heads/master' }} steps: - name: Publish to OSS Build Failure Slack Channel @@ -689,7 +689,7 @@ jobs: - frontend-build - octavia-cli-build - platform-build - - kube-acceptance-test-v2 + - kube-acceptance-test if: success() steps: - name: Get Previous Workflow Status From cce6eb1cd9a14f9c8ab9bd453e21b59c5583630e Mon Sep 17 00:00:00 2001 From: lmossman Date: Wed, 1 Jun 2022 20:58:58 -0700 Subject: [PATCH 05/14] add back scheduler in delete deployment command --- docs/operator-guides/upgrading-airbyte.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 0368f1a0a066..b0d41de5c17d 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -66,7 +66,7 @@ If you are upgrading from (i.e. your current version of Airbyte is) Airbyte vers 1. In a terminal, on the host where Airbyte is running, turn off Airbyte. ```bash - kubectl delete deployments airbyte-db airbyte-worker airbyte-server airbyte-temporal airbyte-webapp --namespace= + kubectl delete deployments airbyte-db airbyte-scheduler airbyte-worker airbyte-server airbyte-temporal airbyte-webapp --namespace= ``` 2. Upgrade the kube deployment to new version. From 70faeef1a1ca70ca326016519bce17fec7cf48f1 Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 2 Jun 2022 15:34:05 -0700 Subject: [PATCH 06/14] remove scheduler parameters from helm chart values --- charts/airbyte/values.yaml | 125 ------------------------------------- 1 file changed, 125 deletions(-) diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 4952697a9c73..e688c901e7f5 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -197,131 +197,6 @@ webapp: ## extraVolumes: [] -## @section Scheduler Parameters - -scheduler: - ## @param scheduler.replicaCount Number of scheduler replicas - replicaCount: 1 - - ## @param scheduler.image.repository The repository to use for the airbyte scheduler image. - ## @param scheduler.image.pullPolicy the pull policy to use for the airbyte scheduler image - ## @param scheduler.image.tag The airbyte scheduler image tag. Defaults to the chart's AppVersion - image: - repository: airbyte/scheduler - pullPolicy: IfNotPresent - tag: 0.39.7-alpha - - ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod - ## - podAnnotations: {} - - ## @param scheduler.containerSecurityContext Security context for the container - ## Examples: - ## containerSecurityContext: - ## runAsNonRoot: true - ## runAsUser: 1000 - ## readOnlyRootFilesystem: true - containerSecurityContext: {} - - ## Configure extra options for the scheduler containers' liveness and readiness probes - ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes - ## @param scheduler.livenessProbe.enabled Enable livenessProbe on the scheduler - ## @param scheduler.livenessProbe.initialDelaySeconds Initial delay seconds for livenessProbe - ## @param scheduler.livenessProbe.periodSeconds Period seconds for livenessProbe - ## @param scheduler.livenessProbe.timeoutSeconds Timeout seconds for livenessProbe - ## @param scheduler.livenessProbe.failureThreshold Failure threshold for livenessProbe - ## @param scheduler.livenessProbe.successThreshold Success threshold for livenessProbe - ## - livenessProbe: - enabled: true - initialDelaySeconds: 5 - periodSeconds: 30 - timeoutSeconds: 1 - failureThreshold: 3 - successThreshold: 1 - - ## @param scheduler.readinessProbe.enabled Enable readinessProbe on the scheduler - ## @param scheduler.readinessProbe.initialDelaySeconds Initial delay seconds for readinessProbe - ## @param scheduler.readinessProbe.periodSeconds Period seconds for readinessProbe - ## @param scheduler.readinessProbe.timeoutSeconds Timeout seconds for readinessProbe - ## @param scheduler.readinessProbe.failureThreshold Failure threshold for readinessProbe - ## @param scheduler.readinessProbe.successThreshold Success threshold for readinessProbe - ## - readinessProbe: - enabled: true - initialDelaySeconds: 5 - periodSeconds: 30 - timeoutSeconds: 1 - failureThreshold: 3 - successThreshold: 1 - - ## Scheduler resource requests and limits - ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ - ## We usually recommend not to specify default resources and to leave this as a conscious - ## choice for the user. This also increases chances charts run on environments with little - ## resources, such as Minikube. If you do want to specify resources, uncomment the following - ## lines, adjust them as necessary, and remove the curly braces after 'resources:'. - ## @param scheduler.resources.limits [object] The resources limits for the scheduler container - ## @param scheduler.resources.requests [object] The requested resources for the scheduler container - resources: - ## Example: - ## limits: - ## cpu: 200m - ## memory: 1Gi - limits: {} - ## Examples: - ## requests: - ## memory: 256Mi - ## cpu: 250m - requests: {} - - ## @param scheduler.nodeSelector [object] Node labels for pod assignment - ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ - ## - nodeSelector: {} - - ## @param scheduler.tolerations [array] Tolerations for scheduler pod assignment. - ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ - ## - tolerations: [] - - ## @param scheduler.affinity [object] Affinity and anti-affinity for scheduler pod assignment. - ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity - ## - affinity: {} - - ## @param scheduler.log.level The log level to log at. - log: - level: "INFO" - - ## @param scheduler.extraEnv [array] Additional env vars for scheduler pod(s). - ## Example: - ## - ## extraEnv: - ## - name: SAMPLE_ENV_VAR - ## value: "key=sample-value" - extraEnv: [] - - ## @param scheduler.extraVolumeMounts [array] Additional volumeMounts for scheduler container(s). - ## Examples: - ## extraVolumeMounts: - ## - name: tmpdir - ## mountPath: /tmp - ## - mountPath: /workspace - ## name: workspace - ## - extraVolumeMounts: [] - - ## @param scheduler.extraVolumes [array] Additional volumes for scheduler pod(s). - ## Examples: - ## extraVolumes: - ## - name: tmpdir - ## emptyDir: {} - ## - name: workspace - ## emptyDir: {} - ## - extraVolumes: [] - ## @section Pod Sweeper parameters podSweeper: From 62919d41ab0f6e6e3c62edc830c15c72ae728b0c Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 2 Jun 2022 17:48:12 -0700 Subject: [PATCH 07/14] add back job cleaner + test and add comment --- .../scheduler/persistence/JobCleaner.java | 155 ++++++++++++++++++ .../scheduler/persistence/JobCleanerTest.java | 143 ++++++++++++++++ 2 files changed, 298 insertions(+) create mode 100644 airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java create mode 100644 airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobCleanerTest.java diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java new file mode 100644 index 000000000000..c50aaa464f50 --- /dev/null +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.scheduler.persistence; + +import com.google.common.collect.Sets; +import io.airbyte.config.WorkspaceRetentionConfig; +import io.airbyte.scheduler.models.Job; +import io.airbyte.scheduler.models.JobStatus; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileTime; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.util.Date; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.AgeFileFilter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * NOTE: This class is currently unused, as of the deletion of the old airbyte-scheduler application. + * This class is being kept around as it may be useful in the future when job sweeping is added to + * the temporal scheduler. Go to the following permalink to see how this class was used by the + * SchedulerApp before that was removed: + * https://github.com/airbytehq/airbyte/blob/88390f24ea0490d979dc9a96623539279a2d0eb0/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java#L270 + */ + +/** + * The job cleaner is responsible for limiting the retention of files in the workspace root. It does + * this in two ways. 1. It cleans out all files and directories that are older than the maximum + * retention date. 2. It cleans out the oldest files before the minimum retention date until it is + * within the max workspace size. + */ +public class JobCleaner implements Runnable { + + private static final Logger LOGGER = LoggerFactory.getLogger(JobCleaner.class); + + private final Path workspaceRoot; + private final JobPersistence jobPersistence; + + private final WorkspaceRetentionConfig config; + + public JobCleaner(final WorkspaceRetentionConfig config, + final Path workspaceRoot, + final JobPersistence jobPersistence) { + this.config = config; + this.workspaceRoot = workspaceRoot; + this.jobPersistence = jobPersistence; + } + + @Override + public void run() { + try { + deleteOldFiles(); + deleteOnSize(); + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + + private void deleteOldFiles() throws IOException { + final Date oldestAllowed = getDateFromDaysAgo(config.getMaxDays()); + + Files.walk(workspaceRoot) + .map(Path::toFile) + .filter(f -> new AgeFileFilter(oldestAllowed).accept(f)) + .forEach(file -> { + LOGGER.info("Deleting old file: " + file.toString()); + FileUtils.deleteQuietly(file); + + final File parentDir = file.getParentFile(); + if (parentDir.isDirectory() && parentDir.listFiles().length == 0) { + FileUtils.deleteQuietly(parentDir); + } + }); + } + + private void deleteOnSize() throws IOException { + final Set nonTerminalJobIds = new HashSet<>(); + final Sets.SetView nonTerminalStatuses = Sets.difference(Set.of(JobStatus.values()), JobStatus.TERMINAL_STATUSES); + + for (final JobStatus nonTerminalStatus : nonTerminalStatuses) { + final Set jobIds = jobPersistence.listJobsWithStatus(nonTerminalStatus) + .stream() + .map(Job::getId) + .map(String::valueOf) + .collect(Collectors.toSet()); + + nonTerminalJobIds.addAll(jobIds); + } + + final Date youngestAllowed = getDateFromDaysAgo(config.getMinDays()); + + final long workspaceBytes = FileUtils.sizeOfDirectory(workspaceRoot.toFile()); + final AtomicLong deletedBytes = new AtomicLong(0); + final AgeFileFilter ageFilter = new AgeFileFilter(youngestAllowed); + Files.walk(workspaceRoot) + .map(Path::toFile) + .filter(f -> { + Path relativePath = workspaceRoot.relativize(f.toPath()); + + // if the directory is ID/something instead of just ID, get just the ID + if (relativePath.getParent() != null) { + relativePath = workspaceRoot.relativize(f.toPath()).getParent(); + } + + if (!relativePath.toString().equals("")) { + return !nonTerminalJobIds.contains(relativePath.toString()); + } else { + return true; + } + }) + .filter(ageFilter::accept) + .sorted((o1, o2) -> { + final FileTime ft1 = getFileTime(o1); + final FileTime ft2 = getFileTime(o2); + return ft1.compareTo(ft2); + }) + .forEach(fileToDelete -> { + if (workspaceBytes - deletedBytes.get() > config.getMaxSizeMb() * 1024 * 1024) { + final long sizeToDelete = fileToDelete.length(); + deletedBytes.addAndGet(sizeToDelete); + LOGGER.info("Deleting: " + fileToDelete.toString()); + FileUtils.deleteQuietly(fileToDelete); + + final File parentDir = fileToDelete.getParentFile(); + if (parentDir.isDirectory() && parentDir.listFiles().length == 0) { + FileUtils.deleteQuietly(parentDir); + } + } + }); + } + + protected static Date getDateFromDaysAgo(final long daysAgo) { + return Date.from(LocalDateTime.now().minusDays(daysAgo).toInstant(OffsetDateTime.now().getOffset())); + } + + private static FileTime getFileTime(final File file) { + try { + return Files.readAttributes(file.toPath(), BasicFileAttributes.class).creationTime(); + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobCleanerTest.java b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobCleanerTest.java new file mode 100644 index 000000000000..5b582f62083c --- /dev/null +++ b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobCleanerTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.scheduler.persistence; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.airbyte.config.WorkspaceRetentionConfig; +import io.airbyte.scheduler.models.Job; +import io.airbyte.scheduler.models.JobStatus; +import io.airbyte.scheduler.persistence.JobPersistence; +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class JobCleanerTest { + + @TempDir + Path folder; + + @Test + public void testNotDeletingFilesInMinimum() throws IOException { + createFile(folder.resolve("1"), "A", 1, 10); + + final JobPersistence jobPersistence = mock(JobPersistence.class); + + final JobCleaner jobCleaner = new JobCleaner( + new WorkspaceRetentionConfig(20, 30, 0), + folder, + jobPersistence); + + final Set before = listFiles(folder); + jobCleaner.run(); + final Set after = listFiles(folder); + + assertFalse(before.isEmpty()); + assertEquals(before, after); + } + + @Test + public void testDeletingOldFiles() throws IOException { + createFile(folder.resolve("1"), "A", 1, 100); + + final JobPersistence jobPersistence = mock(JobPersistence.class); + + final JobCleaner jobCleaner = new JobCleaner( + new WorkspaceRetentionConfig(20, 30, 0), + folder, + jobPersistence); + + final Set before = listFiles(folder); + jobCleaner.run(); + final Set after = listFiles(folder); + + final Set expected = Set.of(""); + + assertFalse(before.isEmpty()); + assertEquals(expected, after); + } + + @Test + public void testDeletingLargeFiles() throws IOException { + createFile(folder.resolve("1"), "A", 1, 10); + createFile(folder.resolve("1"), "B", 1, 10); + createFile(folder.resolve("1"), "C", 1, 10); + createFile(folder.resolve("2"), "D", 1, 18); + createFile(folder.resolve("2"), "E", 1, 19); + createFile(folder.resolve("2"), "F", 1, 20); + + final JobPersistence jobPersistence = mock(JobPersistence.class); + + final JobCleaner jobCleaner = new JobCleaner( + new WorkspaceRetentionConfig(1, 30, 4), + folder, + jobPersistence); + + jobCleaner.run(); + final Set after = listFiles(folder); + final Set expected = Set.of("", "/1", "/1/A", "/1/B", "/1/C", "/2", "/2/D"); + + assertEquals(expected, after); + } + + @Test + public void testNotDeletingRunning() throws IOException { + createFile(folder.resolve("1"), "A", 1, 10); + createFile(folder.resolve("1"), "B", 1, 10); + createFile(folder.resolve("1"), "C", 1, 10); + createFile(folder.resolve("2"), "D", 1, 18); + createFile(folder.resolve("2"), "E", 1, 19); + createFile(folder.resolve("2"), "F", 1, 20); + + final JobPersistence jobPersistence = mock(JobPersistence.class); + final Job job2 = mock(Job.class); + when(job2.getId()).thenReturn(2L); + when(jobPersistence.listJobsWithStatus(JobStatus.RUNNING)).thenReturn(List.of(job2)); + + final JobCleaner jobCleaner = new JobCleaner( + new WorkspaceRetentionConfig(1, 30, 0), + folder, + jobPersistence); + + jobCleaner.run(); + final Set after = listFiles(folder); + final Set expected = Set.of("", "/2", "/2/D", "/2/E", "/2/F"); + + assertEquals(expected, after); + } + + private void createFile(final Path subdirectory, final String filename, final int sizeMb, final int daysAgo) throws IOException { + final long lastModified = JobCleaner.getDateFromDaysAgo(daysAgo).getTime(); + final File subdirFile = subdirectory.toFile(); + if (!subdirFile.exists()) { + subdirFile.mkdir(); + subdirFile.setLastModified(lastModified); + } + + final File file = subdirectory.resolve(filename).toFile(); + file.createNewFile(); + + final RandomAccessFile raf = new RandomAccessFile(file, "rw"); + raf.setLength(sizeMb * 1024 * 1024); + raf.close(); + + file.setLastModified(lastModified); + } + + private Set listFiles(final Path dir) throws IOException { + return Files.walk(dir).map(Path::toString).map(x -> x.replace(folder.toString(), "")).collect(Collectors.toSet()); + } + +} From 4f4124aa68d044aa48b02bd37981f00acc3e92c5 Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 2 Jun 2022 17:56:09 -0700 Subject: [PATCH 08/14] remove now-unused env vars from code and docs --- .env | 4 ---- .../main/java/io/airbyte/config/Configs.java | 12 ---------- .../java/io/airbyte/config/EnvConfigs.java | 22 ------------------- charts/airbyte/templates/env-configmap.yaml | 1 - .../airbyte/templates/worker/deployment.yaml | 5 ----- docs/deploying-airbyte/on-kubernetes.md | 2 +- docs/operator-guides/configuring-airbyte.md | 8 +------ kube/overlays/dev-integration-test/.env | 3 --- kube/overlays/dev/.env | 3 --- .../overlays/stable-with-resource-limits/.env | 3 --- kube/overlays/stable/.env | 3 --- kube/resources/worker.yaml | 5 ----- 12 files changed, 2 insertions(+), 69 deletions(-) diff --git a/.env b/.env index 23f1ed43a924..68b858466b67 100644 --- a/.env +++ b/.env @@ -77,10 +77,6 @@ SENTRY_DSN="https://d4b03de0c4574c78999b8d58e55243dc@o1009025.ingest.sentry.io/6 ### APPLICATIONS ### -# Scheduler # -# Relevant to scaling. -SUBMITTER_NUM_THREADS=10 - # Worker # # Relevant to scaling. MAX_SYNC_WORKERS=5 diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java index dd19fc10bd43..69e78a567657 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java @@ -449,18 +449,6 @@ public interface Configs { */ Set getTemporalWorkerPorts(); - // Scheduler - /** - * Define how and how often the Scheduler sweeps its local disk for old configs. Multiple variables - * are involved here. Please see {@link WorkspaceRetentionConfig} for more info. - */ - WorkspaceRetentionConfig getWorkspaceRetentionConfig(); - - /** - * Define the maximum number of concurrent jobs the Scheduler schedules. Defaults to 5. - */ - String getSubmitterNumThreads(); - // Container Orchestrator /** * Define if Airbyte should use the container orchestrator. Internal-use only. diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index cfc7ebe0bd40..74910908fc13 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -70,9 +70,6 @@ public class EnvConfigs implements Configs { public static final String SYNC_JOB_MAX_ATTEMPTS = "SYNC_JOB_MAX_ATTEMPTS"; public static final String SYNC_JOB_MAX_TIMEOUT_DAYS = "SYNC_JOB_MAX_TIMEOUT_DAYS"; private static final String CONNECTOR_SPECIFIC_RESOURCE_DEFAULTS_ENABLED = "CONNECTOR_SPECIFIC_RESOURCE_DEFAULTS_ENABLED"; - private static final String MINIMUM_WORKSPACE_RETENTION_DAYS = "MINIMUM_WORKSPACE_RETENTION_DAYS"; - private static final String MAXIMUM_WORKSPACE_RETENTION_DAYS = "MAXIMUM_WORKSPACE_RETENTION_DAYS"; - private static final String MAXIMUM_WORKSPACE_SIZE_MB = "MAXIMUM_WORKSPACE_SIZE_MB"; public static final String MAX_SPEC_WORKERS = "MAX_SPEC_WORKERS"; public static final String MAX_CHECK_WORKERS = "MAX_CHECK_WORKERS"; public static final String MAX_DISCOVER_WORKERS = "MAX_DISCOVER_WORKERS"; @@ -81,7 +78,6 @@ public class EnvConfigs implements Configs { private static final String TEMPORAL_WORKER_PORTS = "TEMPORAL_WORKER_PORTS"; private static final String TEMPORAL_HISTORY_RETENTION_IN_DAYS = "TEMPORAL_HISTORY_RETENTION_IN_DAYS"; public static final String JOB_KUBE_NAMESPACE = "JOB_KUBE_NAMESPACE"; - private static final String SUBMITTER_NUM_THREADS = "SUBMITTER_NUM_THREADS"; public static final String JOB_MAIN_CONTAINER_CPU_REQUEST = "JOB_MAIN_CONTAINER_CPU_REQUEST"; public static final String JOB_MAIN_CONTAINER_CPU_LIMIT = "JOB_MAIN_CONTAINER_CPU_LIMIT"; public static final String JOB_MAIN_CONTAINER_MEMORY_REQUEST = "JOB_MAIN_CONTAINER_MEMORY_REQUEST"; @@ -156,9 +152,6 @@ public class EnvConfigs implements Configs { private static final String DEFAULT_JOB_KUBE_SOCAT_IMAGE = "alpine/socat:1.7.4.1-r1"; private static final String DEFAULT_JOB_KUBE_BUSYBOX_IMAGE = "busybox:1.28"; private static final String DEFAULT_JOB_KUBE_CURL_IMAGE = "curlimages/curl:7.77.0"; - private static final long DEFAULT_MINIMUM_WORKSPACE_RETENTION_DAYS = 1; - private static final long DEFAULT_MAXIMUM_WORKSPACE_RETENTION_DAYS = 60; - private static final long DEFAULT_MAXIMUM_WORKSPACE_SIZE_MB = 5000; private static final int DEFAULT_DATABASE_INITIALIZATION_TIMEOUT_MS = 60 * 1000; public static final long DEFAULT_MAX_SPEC_WORKERS = 5; @@ -791,21 +784,6 @@ public Set getTemporalWorkerPorts() { return Arrays.stream(ports.split(",")).map(Integer::valueOf).collect(Collectors.toSet()); } - // Scheduler - @Override - public WorkspaceRetentionConfig getWorkspaceRetentionConfig() { - final long minDays = getEnvOrDefault(MINIMUM_WORKSPACE_RETENTION_DAYS, DEFAULT_MINIMUM_WORKSPACE_RETENTION_DAYS); - final long maxDays = getEnvOrDefault(MAXIMUM_WORKSPACE_RETENTION_DAYS, DEFAULT_MAXIMUM_WORKSPACE_RETENTION_DAYS); - final long maxSizeMb = getEnvOrDefault(MAXIMUM_WORKSPACE_SIZE_MB, DEFAULT_MAXIMUM_WORKSPACE_SIZE_MB); - - return new WorkspaceRetentionConfig(minDays, maxDays, maxSizeMb); - } - - @Override - public String getSubmitterNumThreads() { - return getEnvOrDefault(SUBMITTER_NUM_THREADS, "5"); - } - @Override public boolean getContainerOrchestratorEnabled() { return getEnvOrDefault(CONTAINER_ORCHESTRATOR_ENABLED, false, Boolean::valueOf); diff --git a/charts/airbyte/templates/env-configmap.yaml b/charts/airbyte/templates/env-configmap.yaml index 831907a90d2e..28f3be4a830f 100644 --- a/charts/airbyte/templates/env-configmap.yaml +++ b/charts/airbyte/templates/env-configmap.yaml @@ -43,7 +43,6 @@ data: S3_PATH_STYLE_ACCESS: {{ include "airbyte.s3PathStyleAccess" . | quote }} STATE_STORAGE_MINIO_BUCKET_NAME: airbyte-state-storage STATE_STORAGE_MINIO_ENDPOINT: {{ include "airbyte.minio.endpoint" . | quote }} - SUBMITTER_NUM_THREADS: "10" TEMPORAL_HOST: {{ include "common.names.fullname" . }}-temporal:{{ .Values.temporal.service.port }} TEMPORAL_WORKER_PORTS: 9001,9002,9003,9004,9005,9006,9007,9008,9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,9022,9023,9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039,9040 TRACKING_STRATEGY: segment diff --git a/charts/airbyte/templates/worker/deployment.yaml b/charts/airbyte/templates/worker/deployment.yaml index 8fd10be5f82a..e14aa771184f 100644 --- a/charts/airbyte/templates/worker/deployment.yaml +++ b/charts/airbyte/templates/worker/deployment.yaml @@ -133,11 +133,6 @@ spec: name: airbyte-env key: JOB_KUBE_TOLERATIONS {{- end }} - - name: SUBMITTER_NUM_THREADS - valueFrom: - configMapKeyRef: - name: {{ include "common.names.fullname" . }}-env - key: SUBMITTER_NUM_THREADS - name: JOB_MAIN_CONTAINER_CPU_REQUEST valueFrom: configMapKeyRef: diff --git a/docs/deploying-airbyte/on-kubernetes.md b/docs/deploying-airbyte/on-kubernetes.md index 08a7ad2e703a..bb08328ba40c 100644 --- a/docs/deploying-airbyte/on-kubernetes.md +++ b/docs/deploying-airbyte/on-kubernetes.md @@ -159,7 +159,7 @@ Now visit [http://localhost:8000](http://localhost:8000) in your browser and sta ### Increasing job parallelism -The number of simultaneous jobs \(getting specs, checking connections, discovering schemas, and performing syncs\) is limited by a few factors. First of all, the `SUBMITTER_NUM_THREADS` \(set in the `.env` file for your Kustimization overlay\) provides a global limit on the number of simultaneous jobs that can run across all worker pods. +The number of simultaneous jobs \(getting specs, checking connections, discovering schemas, and performing syncs\) is limited by a few factors. First of all, jobs are picked up and executed by airbyte-worker pods, so increasing the number of workers will allow more jobs to be processed in parallel. The number of worker pods can be changed by increasing the number of replicas for the `airbyte-worker` deployment. An example of a Kustomization patch that increases this number can be seen in `airbyte/kube/overlays/dev-integration-test/kustomization.yaml` and `airbyte/kube/overlays/dev-integration-test/parallelize-worker.yaml`. The number of simultaneous jobs on a specific worker pod is also limited by the number of ports exposed by the worker deployment and set by `TEMPORAL_WORKER_PORTS` in your `.env` file. Without additional ports used to communicate to connector pods, jobs will start to run but will hang until ports become available. diff --git a/docs/operator-guides/configuring-airbyte.md b/docs/operator-guides/configuring-airbyte.md index 894b9cbe6123..3bae0cd9fb9b 100644 --- a/docs/operator-guides/configuring-airbyte.md +++ b/docs/operator-guides/configuring-airbyte.md @@ -80,13 +80,7 @@ The following variables are relevant to both Docker and Kubernetes. 2. `MAX_CHECK_WORKERS` - Define the maximum number of Check workers each Airbyte Worker container can support. Defaults to 5. 3. `MAX_SYNC_WORKERS` - Define the maximum number of Sync workers each Airbyte Worker container can support. Defaults to 5. 4. `MAX_DISCOVER_WORKERS` - Define the maximum number of Discover workers each Airbyte Worker container can support. Defaults to 5. -5. `SENTRY_DSN` - Define the [DSN](https://docs.sentry.io/product/sentry-basics/dsn-explainer/) of necessary Sentry instance. Defaults to empty. Integration with Sentry is explained [here](./sentry-integration.md) - -#### Scheduler -1. `SUBMITTER_NUM_THREADS` - Define the maximum number of concurrent jobs the Scheduler schedules. Defaults to 5. -2. `MINIMUM_WORKSPACE_RETENTION_DAYS` - Defines the minimum configuration file age for sweeping. The Scheduler will do it's best to now sweep files younger than this. Defaults to 1 day. -3. `MAXIMUM_WORKSPACE_RETENTION_DAYS` - Defines the oldest un-swept configuration file age. Files older than this will definitely be swept. Defaults to 60 days. -4. `MAXIMUM_WORKSPACE_SIZE_MB` - Defines the workspace size sweeping will continue until. Defaults to 5GB. +5. `SENTRY_DSN` - Define the [DSN](https://docs.sentry.io/product/sentry-basics/dsn-explainer/) of necessary Sentry instance. Defaults to empty. Integration with Sentry is explained [here](./sentry-integration.md) ### Docker-Only 1. `WORKSPACE_DOCKER_MOUNT` - Defines the name of the Airbyte docker volume. diff --git a/kube/overlays/dev-integration-test/.env b/kube/overlays/dev-integration-test/.env index d937f08653d2..409a698dde09 100644 --- a/kube/overlays/dev-integration-test/.env +++ b/kube/overlays/dev-integration-test/.env @@ -24,9 +24,6 @@ WORKSPACE_DOCKER_MOUNT=airbyte_workspace LOCAL_ROOT=/tmp/airbyte_local -# Maximum total simultaneous jobs across all worker nodes -SUBMITTER_NUM_THREADS=10 - # Miscellaneous TRACKING_STRATEGY=logging WEBAPP_URL=airbyte-webapp-svc:80 diff --git a/kube/overlays/dev/.env b/kube/overlays/dev/.env index 5c0caf4aa478..7d885650df1a 100644 --- a/kube/overlays/dev/.env +++ b/kube/overlays/dev/.env @@ -26,9 +26,6 @@ WORKSPACE_DOCKER_MOUNT=airbyte_workspace LOCAL_ROOT=/tmp/airbyte_local -# Maximum total simultaneous jobs across all worker nodes -SUBMITTER_NUM_THREADS=10 - # Miscellaneous TRACKING_STRATEGY=logging WEBAPP_URL=airbyte-webapp-svc:80 diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index f94f29011b5f..f5f25ebe1d1d 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -26,9 +26,6 @@ WORKSPACE_DOCKER_MOUNT=airbyte_workspace LOCAL_ROOT=/tmp/airbyte_local -# Maximum total simultaneous jobs across all worker nodes -SUBMITTER_NUM_THREADS=10 - # Miscellaneous TRACKING_STRATEGY=segment WEBAPP_URL=airbyte-webapp-svc:80 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index e0f1a557f8f7..ff53707da411 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -26,9 +26,6 @@ WORKSPACE_DOCKER_MOUNT=airbyte_workspace LOCAL_ROOT=/tmp/airbyte_local -# Maximum total simultaneous jobs across all worker nodes -SUBMITTER_NUM_THREADS=10 - # Miscellaneous TRACKING_STRATEGY=segment WEBAPP_URL=airbyte-webapp-svc:80 diff --git a/kube/resources/worker.yaml b/kube/resources/worker.yaml index f0c1deb5565e..532ac994327d 100644 --- a/kube/resources/worker.yaml +++ b/kube/resources/worker.yaml @@ -99,11 +99,6 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace - - name: SUBMITTER_NUM_THREADS - valueFrom: - configMapKeyRef: - name: airbyte-env - key: SUBMITTER_NUM_THREADS - name: JOB_MAIN_CONTAINER_CPU_REQUEST valueFrom: configMapKeyRef: From e076bbb5c76f5adf1923bc4bf7760328d48833db Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 2 Jun 2022 17:56:49 -0700 Subject: [PATCH 09/14] format --- .../java/io/airbyte/scheduler/persistence/JobCleaner.java | 8 ++++---- .../io/airbyte/scheduler/persistence/JobCleanerTest.java | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java index c50aaa464f50..5f54647838eb 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java @@ -27,10 +27,10 @@ import org.slf4j.LoggerFactory; /** - * NOTE: This class is currently unused, as of the deletion of the old airbyte-scheduler application. - * This class is being kept around as it may be useful in the future when job sweeping is added to - * the temporal scheduler. Go to the following permalink to see how this class was used by the - * SchedulerApp before that was removed: + * NOTE: This class is currently unused, as of the deletion of the old airbyte-scheduler + * application. This class is being kept around as it may be useful in the future when job sweeping + * is added to the temporal scheduler. Go to the following permalink to see how this class was used + * by the SchedulerApp before that was removed: * https://github.com/airbytehq/airbyte/blob/88390f24ea0490d979dc9a96623539279a2d0eb0/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java#L270 */ diff --git a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobCleanerTest.java b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobCleanerTest.java index 5b582f62083c..5b9688ee3d94 100644 --- a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobCleanerTest.java +++ b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobCleanerTest.java @@ -12,7 +12,6 @@ import io.airbyte.config.WorkspaceRetentionConfig; import io.airbyte.scheduler.models.Job; import io.airbyte.scheduler.models.JobStatus; -import io.airbyte.scheduler.persistence.JobPersistence; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; From 323698a29adce42f484d5e4ace8fd495cfe46a25 Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 2 Jun 2022 18:09:22 -0700 Subject: [PATCH 10/14] remove feature flags from web backend connection handler as it is no longer needed --- .../main/java/io/airbyte/server/apis/ConfigurationApi.java | 1 - .../airbyte/server/handlers/WebBackendConnectionsHandler.java | 1 - .../server/handlers/WebBackendConnectionsHandlerTest.java | 4 ---- 3 files changed, 6 deletions(-) diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index da634bd9e41d..5d1f058183eb 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -229,7 +229,6 @@ public ConfigurationApi(final ConfigRepository configRepository, jobHistoryHandler, schedulerHandler, operationsHandler, - featureFlags, eventRunner, configRepository); healthCheckHandler = new HealthCheckHandler(configRepository); diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java index 0ef9a8916d3e..f1ba7d23950b 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java @@ -74,7 +74,6 @@ public class WebBackendConnectionsHandler { private final JobHistoryHandler jobHistoryHandler; private final SchedulerHandler schedulerHandler; private final OperationsHandler operationsHandler; - private final FeatureFlags featureFlags; private final EventRunner eventRunner; private final ConfigRepository configRepository; diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java index 4be812e6753f..da293306d38e 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java @@ -60,7 +60,6 @@ import io.airbyte.api.model.generated.WebBackendWorkspaceState; import io.airbyte.api.model.generated.WorkspaceIdRequestBody; import io.airbyte.commons.enums.Enums; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.config.DestinationConnection; import io.airbyte.config.SourceConnection; import io.airbyte.config.StandardDestinationDefinition; @@ -104,7 +103,6 @@ class WebBackendConnectionsHandlerTest { private OperationReadList operationReadList; private WebBackendConnectionRead expected; private WebBackendConnectionRead expectedWithNewSchema; - private FeatureFlags featureFlags; private EventRunner eventRunner; private ConnectionHelper connectionHelper; private ConfigRepository configRepository; @@ -118,7 +116,6 @@ public void setup() throws IOException, JsonValidationException, ConfigNotFoundE final JobHistoryHandler jobHistoryHandler = mock(JobHistoryHandler.class); configRepository = mock(ConfigRepository.class); schedulerHandler = mock(SchedulerHandler.class); - featureFlags = mock(FeatureFlags.class); eventRunner = mock(EventRunner.class); connectionHelper = mock(ConnectionHelper.class); wbHandler = new WebBackendConnectionsHandler(connectionsHandler, @@ -127,7 +124,6 @@ public void setup() throws IOException, JsonValidationException, ConfigNotFoundE jobHistoryHandler, schedulerHandler, operationsHandler, - featureFlags, eventRunner, configRepository); From 28819e3172e6d71088709abadaa7b43c64008ca0 Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 2 Jun 2022 18:22:18 -0700 Subject: [PATCH 11/14] remove feature flags from config api as it is now longer needed --- .../main/java/io/airbyte/server/ConfigurationApiFactory.java | 5 ----- .../src/main/java/io/airbyte/server/ServerApp.java | 1 - .../src/main/java/io/airbyte/server/ServerFactory.java | 4 ---- .../main/java/io/airbyte/server/apis/ConfigurationApi.java | 2 -- .../server/handlers/WebBackendConnectionsHandler.java | 1 - 5 files changed, 13 deletions(-) diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java index 091ae398ed2f..0c3c192783c6 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java @@ -5,7 +5,6 @@ package io.airbyte.server; import io.airbyte.analytics.TrackingClient; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.io.FileTtlManager; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs.WorkerEnvironment; @@ -44,7 +43,6 @@ public class ConfigurationApiFactory implements Factory { private static Path workspaceRoot; private static AirbyteVersion airbyteVersion; private static HttpClient httpClient; - private static FeatureFlags featureFlags; private static EventRunner eventRunner; private static Flyway configsFlyway; private static Flyway jobsFlyway; @@ -66,7 +64,6 @@ public static void setValues( final AirbyteVersion airbyteVersion, final Path workspaceRoot, final HttpClient httpClient, - final FeatureFlags featureFlags, final EventRunner eventRunner, final Flyway configsFlyway, final Flyway jobsFlyway) { @@ -86,7 +83,6 @@ public static void setValues( ConfigurationApiFactory.workspaceRoot = workspaceRoot; ConfigurationApiFactory.airbyteVersion = airbyteVersion; ConfigurationApiFactory.httpClient = httpClient; - ConfigurationApiFactory.featureFlags = featureFlags; ConfigurationApiFactory.eventRunner = eventRunner; ConfigurationApiFactory.configsFlyway = configsFlyway; ConfigurationApiFactory.jobsFlyway = jobsFlyway; @@ -112,7 +108,6 @@ public ConfigurationApi provide() { ConfigurationApiFactory.airbyteVersion, ConfigurationApiFactory.workspaceRoot, ConfigurationApiFactory.httpClient, - ConfigurationApiFactory.featureFlags, ConfigurationApiFactory.eventRunner, ConfigurationApiFactory.configsFlyway, ConfigurationApiFactory.jobsFlyway); diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index 1f5bdb2b97fe..59192a2b92c7 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -226,7 +226,6 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, configs.getAirbyteVersion(), configs.getWorkspaceRoot(), httpClient, - featureFlags, eventRunner, configsFlyway, jobsFlyway); diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java index 3221b565dbb5..136b1c956409 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java @@ -5,7 +5,6 @@ package io.airbyte.server; import io.airbyte.analytics.TrackingClient; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.io.FileTtlManager; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs.WorkerEnvironment; @@ -42,7 +41,6 @@ ServerRunnable create(SynchronousSchedulerClient cachingSchedulerClient, AirbyteVersion airbyteVersion, Path workspaceRoot, HttpClient httpClient, - FeatureFlags featureFlags, EventRunner eventRunner, Flyway configsFlyway, Flyway jobsFlyway); @@ -64,7 +62,6 @@ public ServerRunnable create(final SynchronousSchedulerClient synchronousSchedul final AirbyteVersion airbyteVersion, final Path workspaceRoot, final HttpClient httpClient, - final FeatureFlags featureFlags, final EventRunner eventRunner, final Flyway configsFlyway, final Flyway jobsFlyway) { @@ -86,7 +83,6 @@ public ServerRunnable create(final SynchronousSchedulerClient synchronousSchedul airbyteVersion, workspaceRoot, httpClient, - featureFlags, eventRunner, configsFlyway, jobsFlyway); diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index 5d1f058183eb..0da999eed61c 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -96,7 +96,6 @@ import io.airbyte.api.model.generated.WorkspaceReadList; import io.airbyte.api.model.generated.WorkspaceUpdate; import io.airbyte.api.model.generated.WorkspaceUpdateName; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.io.FileTtlManager; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs.WorkerEnvironment; @@ -176,7 +175,6 @@ public ConfigurationApi(final ConfigRepository configRepository, final AirbyteVersion airbyteVersion, final Path workspaceRoot, final HttpClient httpClient, - final FeatureFlags featureFlags, final EventRunner eventRunner, final Flyway configsFlyway, final Flyway jobsFlyway) { diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java index f1ba7d23950b..c97b717e17fb 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java @@ -43,7 +43,6 @@ import io.airbyte.api.model.generated.WebBackendWorkspaceState; import io.airbyte.api.model.generated.WebBackendWorkspaceStateResult; import io.airbyte.api.model.generated.WorkspaceIdRequestBody; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.MoreBooleans; import io.airbyte.config.persistence.ConfigNotFoundException; From 8498cbee5c152cbf0fa8c024d2f0b271d349418e Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 2 Jun 2022 18:56:08 -0700 Subject: [PATCH 12/14] remove feature flags input from config api test --- .../test/java/io/airbyte/server/apis/ConfigurationApiTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java index ba9a62928299..a2c05b7f59e6 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java @@ -52,7 +52,6 @@ void testImportDefinitions() { new AirbyteVersion("0.1.0-alpha"), Path.of(""), mock(HttpClient.class), - mock(FeatureFlags.class), mock(EventRunner.class), mock(Flyway.class), mock(Flyway.class)); From f494f6c23cb407de61187ce6d959e6e0b8f34811 Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 2 Jun 2022 19:32:41 -0700 Subject: [PATCH 13/14] format + shorter url --- .../main/java/io/airbyte/scheduler/persistence/JobCleaner.java | 2 +- .../test/java/io/airbyte/server/apis/ConfigurationApiTest.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java index 5f54647838eb..b8a17d18ef92 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobCleaner.java @@ -31,7 +31,7 @@ * application. This class is being kept around as it may be useful in the future when job sweeping * is added to the temporal scheduler. Go to the following permalink to see how this class was used * by the SchedulerApp before that was removed: - * https://github.com/airbytehq/airbyte/blob/88390f24ea0490d979dc9a96623539279a2d0eb0/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java#L270 + * https://github.com/airbytehq/airbyte/blob/v0.39.8-alpha/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java#L270 */ /** diff --git a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java index a2c05b7f59e6..efcdde44f6c2 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java @@ -9,7 +9,6 @@ import static org.mockito.Mockito.when; import io.airbyte.analytics.TrackingClient; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.io.FileTtlManager; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs; From 2f8c33c3355f25d97d7564f48c273f8788f81cf8 Mon Sep 17 00:00:00 2001 From: lmossman Date: Fri, 3 Jun 2022 14:51:25 -0700 Subject: [PATCH 14/14] remove scheduler parameters from helm chart readme --- charts/airbyte/README.md | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index a6e93cc938c2..121b28c17dfb 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -66,39 +66,6 @@ Helm charts for Airbyte. | `webapp.extraVolumes` | Additional volumes for webapp pod(s). | `[]` | -### Scheduler Parameters - -| Name | Description | Value | -| ---------------------------------------------- | ------------------------------------------------------------------- | ------------------- | -| `scheduler.replicaCount` | Number of scheduler replicas | `1` | -| `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | -| `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.39.9-alpha` | -| `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | -| `scheduler.containerSecurityContext` | Security context for the container | `{}` | -| `scheduler.livenessProbe.enabled` | Enable livenessProbe on the scheduler | `true` | -| `scheduler.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `5` | -| `scheduler.livenessProbe.periodSeconds` | Period seconds for livenessProbe | `30` | -| `scheduler.livenessProbe.timeoutSeconds` | Timeout seconds for livenessProbe | `1` | -| `scheduler.livenessProbe.failureThreshold` | Failure threshold for livenessProbe | `3` | -| `scheduler.livenessProbe.successThreshold` | Success threshold for livenessProbe | `1` | -| `scheduler.readinessProbe.enabled` | Enable readinessProbe on the scheduler | `true` | -| `scheduler.readinessProbe.initialDelaySeconds` | Initial delay seconds for readinessProbe | `5` | -| `scheduler.readinessProbe.periodSeconds` | Period seconds for readinessProbe | `30` | -| `scheduler.readinessProbe.timeoutSeconds` | Timeout seconds for readinessProbe | `1` | -| `scheduler.readinessProbe.failureThreshold` | Failure threshold for readinessProbe | `3` | -| `scheduler.readinessProbe.successThreshold` | Success threshold for readinessProbe | `1` | -| `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | -| `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | -| `scheduler.nodeSelector` | Node labels for pod assignment | `{}` | -| `scheduler.tolerations` | Tolerations for scheduler pod assignment. | `[]` | -| `scheduler.affinity` | Affinity and anti-affinity for scheduler pod assignment. | `{}` | -| `scheduler.log.level` | The log level to log at. | `INFO` | -| `scheduler.extraEnv` | Additional env vars for scheduler pod(s). | `[]` | -| `scheduler.extraVolumeMounts` | Additional volumeMounts for scheduler container(s). | `[]` | -| `scheduler.extraVolumes` | Additional volumes for scheduler pod(s). | `[]` | - - ### Pod Sweeper parameters | Name | Description | Value |