diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java index 36f05be67751..6e948dd92e5f 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/general/DefaultReplicationWorker.java @@ -38,7 +38,7 @@ import io.airbyte.workers.internal.AirbyteDestination; import io.airbyte.workers.internal.AirbyteMapper; import io.airbyte.workers.internal.AirbyteSource; -import io.airbyte.workers.internal.MessageTracker; +import io.airbyte.workers.internal.book_keeping.MessageTracker; import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/AirbyteMessageTracker.java similarity index 91% rename from airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java rename to airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/AirbyteMessageTracker.java index a94c00927431..aa0fa8ec70a8 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/AirbyteMessageTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/AirbyteMessageTracker.java @@ -2,7 +2,7 @@ * Copyright (c) 2022 Airbyte, Inc., all rights reserved. */ -package io.airbyte.workers.internal; +package io.airbyte.workers.internal.book_keeping; import static io.airbyte.metrics.lib.ApmTraceConstants.WORKER_OPERATION_NAME; @@ -28,7 +28,7 @@ import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair; import io.airbyte.protocol.models.AirbyteTraceMessage; import io.airbyte.workers.helper.FailureHelper; -import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerNoStateMatchException; +import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerNoStateMatchException; import io.airbyte.workers.internal.state_aggregator.DefaultStateAggregator; import io.airbyte.workers.internal.state_aggregator.StateAggregator; import java.time.LocalDateTime; @@ -42,6 +42,13 @@ import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +/** + * This class is responsible for stats and metadata tracking surrounding + * {@link AirbyteRecordMessage}. + *

+ * It is not intended to perform meaningful operations - transforming, mutating, triggering + * downstream actions etc. - on specific messages. + */ @Slf4j public class AirbyteMessageTracker implements MessageTracker { @@ -54,8 +61,6 @@ public class AirbyteMessageTracker implements MessageTracker { private final HashFunction hashFunction; private final BiMap nameNamespacePairToIndex; private final Map nameNamespacePairToStreamStats; - private final Map streamToTotalBytesEmitted; - private final Map streamToTotalRecordsEmitted; private final StateDeltaTracker stateDeltaTracker; private final StateMetricsTracker stateMetricsTracker; private final List destinationErrorTraceMessages; @@ -86,11 +91,6 @@ private enum ConnectorType { DESTINATION } - /** - * POJO for all per-stream stats. - */ - private record StreamStats(long estimatedBytes, long emittedBytes, long estimatedRecords, long emittedRecords) {} - public AirbyteMessageTracker() { this(new StateDeltaTracker(STATE_DELTA_TRACKER_MEMORY_LIMIT_BYTES), new DefaultStateAggregator(new EnvVariableFeatureFlags().useStreamCapableState()), @@ -107,8 +107,6 @@ protected AirbyteMessageTracker(final StateDeltaTracker stateDeltaTracker, this.nameNamespacePairToIndex = HashBiMap.create(); this.hashFunction = Hashing.murmur3_32_fixed(); this.nameNamespacePairToStreamStats = new HashMap<>(); - this.streamToTotalBytesEmitted = new HashMap<>(); - this.streamToTotalRecordsEmitted = new HashMap<>(); this.stateDeltaTracker = stateDeltaTracker; this.stateMetricsTracker = stateMetricsTracker; this.nextStreamIndex = 0; @@ -155,17 +153,19 @@ private void handleSourceEmittedRecord(final AirbyteRecordMessage recordMessage) stateMetricsTracker.setFirstRecordReceivedAt(LocalDateTime.now()); } - final short streamIndex = getStreamIndex(AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage)); + final var nameNamespace = AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage); + final short streamIndex = getStreamIndex(nameNamespace); final long currentRunningCount = streamToRunningCount.getOrDefault(streamIndex, 0L); streamToRunningCount.put(streamIndex, currentRunningCount + 1); - final long currentTotalCount = streamToTotalRecordsEmitted.getOrDefault(streamIndex, 0L); - streamToTotalRecordsEmitted.put(streamIndex, currentTotalCount + 1); + final var currStats = nameNamespacePairToStreamStats.getOrDefault(nameNamespace, new StreamStats()); + currStats.emittedRecords++; final int estimatedNumBytes = Jsons.getEstimatedByteSize(recordMessage.getData()); - final long currentTotalStreamBytes = streamToTotalBytesEmitted.getOrDefault(streamIndex, 0L); - streamToTotalBytesEmitted.put(streamIndex, currentTotalStreamBytes + estimatedNumBytes); + currStats.emittedBytes += estimatedNumBytes; + + nameNamespacePairToStreamStats.put(nameNamespace, currStats); } /** @@ -296,9 +296,11 @@ private void handleEmittedEstimateTrace(final AirbyteEstimateTraceMessage estima Preconditions.checkArgument(totalRecordsEstimatedSync == null, "STREAM and SYNC estimates should not be emitted in the same sync."); log.debug("Saving stream estimates for namespace: {}, stream: {}", estimate.getNamespace(), estimate.getName()); - nameNamespacePairToStreamStats.put( - new AirbyteStreamNameNamespacePair(estimate.getName(), estimate.getNamespace()), - new StreamStats(estimate.getByteEstimate(), 0L, estimate.getRowEstimate(), 0L)); + final var nameNamespace = new AirbyteStreamNameNamespacePair(estimate.getName(), estimate.getNamespace()); + final var currStats = nameNamespacePairToStreamStats.getOrDefault(nameNamespace, new StreamStats()); + currStats.estimatedRecords = estimate.getRowEstimate(); + currStats.estimatedBytes = estimate.getByteEstimate(); + nameNamespacePairToStreamStats.put(nameNamespace, currStats); } case SYNC -> { Preconditions.checkArgument(nameNamespacePairToStreamStats.isEmpty(), "STREAM and SYNC estimates should not be emitted in the same sync."); @@ -404,8 +406,8 @@ public Optional> getStreamToCommittedR */ @Override public Map getStreamToEmittedRecords() { - return streamToTotalRecordsEmitted.entrySet().stream().collect(Collectors.toMap( - entry -> nameNamespacePairToIndex.inverse().get(entry.getKey()), Entry::getValue)); + return nameNamespacePairToStreamStats.entrySet().stream().collect(Collectors.toMap( + Entry::getKey, entry -> entry.getValue().emittedRecords)); } /** @@ -416,7 +418,7 @@ public Map getStreamToEstimatedRecords() { return nameNamespacePairToStreamStats.entrySet().stream().collect( Collectors.toMap( Entry::getKey, - entry -> entry.getValue().estimatedRecords())); + entry -> entry.getValue().estimatedRecords)); } /** @@ -424,8 +426,9 @@ public Map getStreamToEstimatedRecords() { */ @Override public Map getStreamToEmittedBytes() { - return streamToTotalBytesEmitted.entrySet().stream().collect(Collectors.toMap( - entry -> nameNamespacePairToIndex.inverse().get(entry.getKey()), Entry::getValue)); + return nameNamespacePairToStreamStats.entrySet().stream().collect(Collectors.toMap( + Entry::getKey, + entry -> entry.getValue().emittedBytes)); } /** @@ -436,7 +439,7 @@ public Map getStreamToEstimatedBytes() { return nameNamespacePairToStreamStats.entrySet().stream().collect( Collectors.toMap( Entry::getKey, - entry -> entry.getValue().estimatedBytes())); + entry -> entry.getValue().estimatedBytes)); } /** @@ -444,7 +447,9 @@ public Map getStreamToEstimatedBytes() { */ @Override public long getTotalRecordsEmitted() { - return streamToTotalRecordsEmitted.values().stream().reduce(0L, Long::sum); + return nameNamespacePairToStreamStats.values().stream() + .map(stats -> stats.emittedRecords) + .reduce(0L, Long::sum); } /** @@ -466,7 +471,9 @@ public long getTotalRecordsEstimated() { */ @Override public long getTotalBytesEmitted() { - return streamToTotalBytesEmitted.values().stream().reduce(0L, Long::sum); + return nameNamespacePairToStreamStats.values().stream() + .map(e -> e.emittedBytes) + .reduce(0L, Long::sum); } /** diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/MessageTracker.java similarity index 98% rename from airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java rename to airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/MessageTracker.java index a2f31bf250d8..cffc10f8606c 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/MessageTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/MessageTracker.java @@ -2,7 +2,7 @@ * Copyright (c) 2022 Airbyte, Inc., all rights reserved. */ -package io.airbyte.workers.internal; +package io.airbyte.workers.internal.book_keeping; import io.airbyte.config.FailureReason; import io.airbyte.config.State; diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/StateDeltaTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StateDeltaTracker.java similarity index 99% rename from airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/StateDeltaTracker.java rename to airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StateDeltaTracker.java index beab1e0b63e1..6133816174fb 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/StateDeltaTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StateDeltaTracker.java @@ -2,7 +2,7 @@ * Copyright (c) 2022 Airbyte, Inc., all rights reserved. */ -package io.airbyte.workers.internal; +package io.airbyte.workers.internal.book_keeping; import static io.airbyte.metrics.lib.ApmTraceConstants.WORKER_OPERATION_NAME; diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/StateMetricsTracker.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StateMetricsTracker.java similarity index 99% rename from airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/StateMetricsTracker.java rename to airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StateMetricsTracker.java index b77ca9550d1e..24039cd1911d 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/StateMetricsTracker.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StateMetricsTracker.java @@ -2,7 +2,7 @@ * Copyright (c) 2022 Airbyte, Inc., all rights reserved. */ -package io.airbyte.workers.internal; +package io.airbyte.workers.internal.book_keeping; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StreamStats.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StreamStats.java new file mode 100644 index 000000000000..efdc3b5122f4 --- /dev/null +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/book_keeping/StreamStats.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.internal.book_keeping; + +import java.util.Objects; + +/** + * POJO for all per-stream stats. + *

+ * We are not able to use a {@link Record} since we want non-final fields to accumulate counts. + */ +public class StreamStats { + + public long estimatedRecords; + public long estimatedBytes; + public long emittedRecords; + public long emittedBytes; + + public StreamStats() { + this(0L, 0L, 0L, 0L); + } + + public StreamStats(final long estimatedBytes, final long emittedBytes, final long estimatedRecords, final long emittedRecords) { + this.estimatedRecords = estimatedRecords; + this.estimatedBytes = estimatedBytes; + this.emittedRecords = emittedRecords; + this.emittedBytes = emittedBytes; + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final StreamStats that = (StreamStats) o; + return estimatedRecords == that.estimatedRecords && estimatedBytes == that.estimatedBytes && emittedRecords == that.emittedRecords + && emittedBytes == that.emittedBytes; + } + + @Override + public int hashCode() { + return Objects.hash(estimatedRecords, estimatedBytes, emittedRecords, emittedBytes); + } + +} diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/general/DefaultReplicationWorkerTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/general/DefaultReplicationWorkerTest.java index e13f44edb6b4..36cab0784cd1 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/general/DefaultReplicationWorkerTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/general/DefaultReplicationWorkerTest.java @@ -50,9 +50,9 @@ import io.airbyte.workers.exception.WorkerException; import io.airbyte.workers.helper.FailureHelper; import io.airbyte.workers.internal.AirbyteDestination; -import io.airbyte.workers.internal.AirbyteMessageTracker; import io.airbyte.workers.internal.AirbyteSource; import io.airbyte.workers.internal.NamespacingMapper; +import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker; import io.airbyte.workers.test_utils.AirbyteMessageUtils; import io.airbyte.workers.test_utils.TestConfigHelpers; import java.io.IOException; diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/AirbyteMessageTrackerTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/AirbyteMessageTrackerTest.java similarity index 99% rename from airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/AirbyteMessageTrackerTest.java rename to airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/AirbyteMessageTrackerTest.java index aed444225cf9..8d6871ede3d1 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/AirbyteMessageTrackerTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/AirbyteMessageTrackerTest.java @@ -2,7 +2,7 @@ * Copyright (c) 2022 Airbyte, Inc., all rights reserved. */ -package io.airbyte.workers.internal; +package io.airbyte.workers.internal.book_keeping; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; @@ -15,7 +15,7 @@ import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair; import io.airbyte.workers.helper.FailureHelper; -import io.airbyte.workers.internal.StateDeltaTracker.StateDeltaTrackerException; +import io.airbyte.workers.internal.book_keeping.StateDeltaTracker.StateDeltaTrackerException; import io.airbyte.workers.internal.state_aggregator.StateAggregator; import io.airbyte.workers.test_utils.AirbyteMessageUtils; import java.util.HashMap; diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/StateDeltaTrackerTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/StateDeltaTrackerTest.java similarity index 97% rename from airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/StateDeltaTrackerTest.java rename to airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/StateDeltaTrackerTest.java index 3df44d94a5fd..cee9fda29306 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/StateDeltaTrackerTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/StateDeltaTrackerTest.java @@ -2,9 +2,9 @@ * Copyright (c) 2022 Airbyte, Inc., all rights reserved. */ -package io.airbyte.workers.internal; +package io.airbyte.workers.internal.book_keeping; -import io.airbyte.workers.internal.StateDeltaTracker.StateDeltaTrackerException; +import io.airbyte.workers.internal.book_keeping.StateDeltaTracker.StateDeltaTrackerException; import java.util.Collections; import java.util.HashMap; import java.util.Map; diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/StateMetricsTrackerTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/StateMetricsTrackerTest.java similarity index 97% rename from airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/StateMetricsTrackerTest.java rename to airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/StateMetricsTrackerTest.java index 8c2759e83651..15456f853f35 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/StateMetricsTrackerTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/book_keeping/StateMetricsTrackerTest.java @@ -2,15 +2,15 @@ * Copyright (c) 2022 Airbyte, Inc., all rights reserved. */ -package io.airbyte.workers.internal; +package io.airbyte.workers.internal.book_keeping; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerNoStateMatchException; -import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerOomException; +import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerNoStateMatchException; +import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerOomException; import io.airbyte.workers.test_utils.AirbyteMessageUtils; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/orchestrator/ReplicationJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/orchestrator/ReplicationJobOrchestrator.java index ed3b0708e20c..bc04e8232633 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/orchestrator/ReplicationJobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/orchestrator/ReplicationJobOrchestrator.java @@ -30,7 +30,6 @@ import io.airbyte.workers.WorkerMetricReporter; import io.airbyte.workers.WorkerUtils; import io.airbyte.workers.general.DefaultReplicationWorker; -import io.airbyte.workers.internal.AirbyteMessageTracker; import io.airbyte.workers.internal.AirbyteStreamFactory; import io.airbyte.workers.internal.DefaultAirbyteDestination; import io.airbyte.workers.internal.DefaultAirbyteSource; @@ -39,6 +38,7 @@ import io.airbyte.workers.internal.NamespacingMapper; import io.airbyte.workers.internal.VersionedAirbyteMessageBufferedWriterFactory; import io.airbyte.workers.internal.VersionedAirbyteStreamFactory; +import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker; import io.airbyte.workers.process.AirbyteIntegrationLauncher; import io.airbyte.workers.process.KubePodProcess; import io.airbyte.workers.process.ProcessFactory; diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java index 83f86861d6b3..7c7470218818 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/AttemptHandler.java @@ -21,8 +21,7 @@ public AttemptHandler(JobPersistence jobPersistence) { this.jobPersistence = jobPersistence; } - public InternalOperationResult setWorkflowInAttempt( - SetWorkflowInAttemptRequestBody requestBody) { + public InternalOperationResult setWorkflowInAttempt(SetWorkflowInAttemptRequestBody requestBody) { try { jobPersistence.setAttemptTemporalWorkflowInfo(requestBody.getJobId(), requestBody.getAttemptNumber(), requestBody.getWorkflowId().toString(), requestBody.getProcessingTaskQueue()); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java index e7588ce8f82c..389e182ed322 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java @@ -52,7 +52,6 @@ import io.airbyte.workers.WorkerMetricReporter; import io.airbyte.workers.WorkerUtils; import io.airbyte.workers.general.DefaultReplicationWorker; -import io.airbyte.workers.internal.AirbyteMessageTracker; import io.airbyte.workers.internal.AirbyteSource; import io.airbyte.workers.internal.DefaultAirbyteDestination; import io.airbyte.workers.internal.DefaultAirbyteSource; @@ -60,6 +59,7 @@ import io.airbyte.workers.internal.NamespacingMapper; import io.airbyte.workers.internal.VersionedAirbyteMessageBufferedWriterFactory; import io.airbyte.workers.internal.VersionedAirbyteStreamFactory; +import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker; import io.airbyte.workers.process.AirbyteIntegrationLauncher; import io.airbyte.workers.process.IntegrationLauncher; import io.airbyte.workers.process.ProcessFactory;