Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Further simplify metrics tracker. #19988

Merged
merged 3 commits into from
Dec 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import io.airbyte.workers.internal.AirbyteDestination;
import io.airbyte.workers.internal.AirbyteMapper;
import io.airbyte.workers.internal.AirbyteSource;
import io.airbyte.workers.internal.MessageTracker;
import io.airbyte.workers.internal.book_keeping.MessageTracker;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import static io.airbyte.metrics.lib.ApmTraceConstants.WORKER_OPERATION_NAME;

Expand All @@ -28,7 +28,7 @@
import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair;
import io.airbyte.protocol.models.AirbyteTraceMessage;
import io.airbyte.workers.helper.FailureHelper;
import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerNoStateMatchException;
import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerNoStateMatchException;
import io.airbyte.workers.internal.state_aggregator.DefaultStateAggregator;
import io.airbyte.workers.internal.state_aggregator.StateAggregator;
import java.time.LocalDateTime;
Expand All @@ -42,6 +42,13 @@
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;

/**
* This class is responsible for stats and metadata tracking surrounding
* {@link AirbyteRecordMessage}.
* <p>
* It is not intended to perform meaningful operations - transforming, mutating, triggering
* downstream actions etc. - on specific messages.
*/
@Slf4j
public class AirbyteMessageTracker implements MessageTracker {

Expand All @@ -54,8 +61,6 @@ public class AirbyteMessageTracker implements MessageTracker {
private final HashFunction hashFunction;
private final BiMap<AirbyteStreamNameNamespacePair, Short> nameNamespacePairToIndex;
private final Map<AirbyteStreamNameNamespacePair, StreamStats> nameNamespacePairToStreamStats;
private final Map<Short, Long> streamToTotalBytesEmitted;
private final Map<Short, Long> streamToTotalRecordsEmitted;
private final StateDeltaTracker stateDeltaTracker;
private final StateMetricsTracker stateMetricsTracker;
private final List<AirbyteTraceMessage> destinationErrorTraceMessages;
Expand Down Expand Up @@ -86,11 +91,6 @@ private enum ConnectorType {
DESTINATION
}

/**
* POJO for all per-stream stats.
*/
private record StreamStats(long estimatedBytes, long emittedBytes, long estimatedRecords, long emittedRecords) {}

public AirbyteMessageTracker() {
this(new StateDeltaTracker(STATE_DELTA_TRACKER_MEMORY_LIMIT_BYTES),
new DefaultStateAggregator(new EnvVariableFeatureFlags().useStreamCapableState()),
Expand All @@ -107,8 +107,6 @@ protected AirbyteMessageTracker(final StateDeltaTracker stateDeltaTracker,
this.nameNamespacePairToIndex = HashBiMap.create();
this.hashFunction = Hashing.murmur3_32_fixed();
this.nameNamespacePairToStreamStats = new HashMap<>();
this.streamToTotalBytesEmitted = new HashMap<>();
this.streamToTotalRecordsEmitted = new HashMap<>();
this.stateDeltaTracker = stateDeltaTracker;
this.stateMetricsTracker = stateMetricsTracker;
this.nextStreamIndex = 0;
Expand Down Expand Up @@ -155,17 +153,19 @@ private void handleSourceEmittedRecord(final AirbyteRecordMessage recordMessage)
stateMetricsTracker.setFirstRecordReceivedAt(LocalDateTime.now());
}

final short streamIndex = getStreamIndex(AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage));
final var nameNamespace = AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage);
final short streamIndex = getStreamIndex(nameNamespace);

final long currentRunningCount = streamToRunningCount.getOrDefault(streamIndex, 0L);
streamToRunningCount.put(streamIndex, currentRunningCount + 1);

final long currentTotalCount = streamToTotalRecordsEmitted.getOrDefault(streamIndex, 0L);
streamToTotalRecordsEmitted.put(streamIndex, currentTotalCount + 1);
final var currStats = nameNamespacePairToStreamStats.getOrDefault(nameNamespace, new StreamStats());
currStats.emittedRecords++;

final int estimatedNumBytes = Jsons.getEstimatedByteSize(recordMessage.getData());
final long currentTotalStreamBytes = streamToTotalBytesEmitted.getOrDefault(streamIndex, 0L);
streamToTotalBytesEmitted.put(streamIndex, currentTotalStreamBytes + estimatedNumBytes);
currStats.emittedBytes += estimatedNumBytes;

nameNamespacePairToStreamStats.put(nameNamespace, currStats);
}

/**
Expand Down Expand Up @@ -296,9 +296,11 @@ private void handleEmittedEstimateTrace(final AirbyteEstimateTraceMessage estima
Preconditions.checkArgument(totalRecordsEstimatedSync == null, "STREAM and SYNC estimates should not be emitted in the same sync.");

log.debug("Saving stream estimates for namespace: {}, stream: {}", estimate.getNamespace(), estimate.getName());
nameNamespacePairToStreamStats.put(
new AirbyteStreamNameNamespacePair(estimate.getName(), estimate.getNamespace()),
new StreamStats(estimate.getByteEstimate(), 0L, estimate.getRowEstimate(), 0L));
final var nameNamespace = new AirbyteStreamNameNamespacePair(estimate.getName(), estimate.getNamespace());
final var currStats = nameNamespacePairToStreamStats.getOrDefault(nameNamespace, new StreamStats());
currStats.estimatedRecords = estimate.getRowEstimate();
currStats.estimatedBytes = estimate.getByteEstimate();
nameNamespacePairToStreamStats.put(nameNamespace, currStats);
}
case SYNC -> {
Preconditions.checkArgument(nameNamespacePairToStreamStats.isEmpty(), "STREAM and SYNC estimates should not be emitted in the same sync.");
Expand Down Expand Up @@ -404,8 +406,8 @@ public Optional<Map<AirbyteStreamNameNamespacePair, Long>> getStreamToCommittedR
*/
@Override
public Map<AirbyteStreamNameNamespacePair, Long> getStreamToEmittedRecords() {
return streamToTotalRecordsEmitted.entrySet().stream().collect(Collectors.toMap(
entry -> nameNamespacePairToIndex.inverse().get(entry.getKey()), Entry::getValue));
return nameNamespacePairToStreamStats.entrySet().stream().collect(Collectors.toMap(
Entry::getKey, entry -> entry.getValue().emittedRecords));
}

/**
Expand All @@ -416,16 +418,17 @@ public Map<AirbyteStreamNameNamespacePair, Long> getStreamToEstimatedRecords() {
return nameNamespacePairToStreamStats.entrySet().stream().collect(
Collectors.toMap(
Entry::getKey,
entry -> entry.getValue().estimatedRecords()));
entry -> entry.getValue().estimatedRecords));
}

/**
* Swap out stream indices for stream names and return total bytes emitted by stream.
*/
@Override
public Map<AirbyteStreamNameNamespacePair, Long> getStreamToEmittedBytes() {
return streamToTotalBytesEmitted.entrySet().stream().collect(Collectors.toMap(
entry -> nameNamespacePairToIndex.inverse().get(entry.getKey()), Entry::getValue));
return nameNamespacePairToStreamStats.entrySet().stream().collect(Collectors.toMap(
Entry::getKey,
entry -> entry.getValue().emittedBytes));
}

/**
Expand All @@ -436,15 +439,17 @@ public Map<AirbyteStreamNameNamespacePair, Long> getStreamToEstimatedBytes() {
return nameNamespacePairToStreamStats.entrySet().stream().collect(
Collectors.toMap(
Entry::getKey,
entry -> entry.getValue().estimatedBytes()));
entry -> entry.getValue().estimatedBytes));
}

/**
* Compute sum of emitted record counts across all streams.
*/
@Override
public long getTotalRecordsEmitted() {
return streamToTotalRecordsEmitted.values().stream().reduce(0L, Long::sum);
return nameNamespacePairToStreamStats.values().stream()
.map(stats -> stats.emittedRecords)
.reduce(0L, Long::sum);
}

/**
Expand All @@ -466,7 +471,9 @@ public long getTotalRecordsEstimated() {
*/
@Override
public long getTotalBytesEmitted() {
return streamToTotalBytesEmitted.values().stream().reduce(0L, Long::sum);
return nameNamespacePairToStreamStats.values().stream()
.map(e -> e.emittedBytes)
.reduce(0L, Long::sum);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import io.airbyte.config.FailureReason;
import io.airbyte.config.State;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import static io.airbyte.metrics.lib.ApmTraceConstants.WORKER_OPERATION_NAME;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import io.airbyte.protocol.models.AirbyteStateMessage;
import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal.book_keeping;

import java.util.Objects;

/**
* POJO for all per-stream stats.
* <p>
* We are not able to use a {@link Record} since we want non-final fields to accumulate counts.
*/
public class StreamStats {

public long estimatedRecords;
public long estimatedBytes;
public long emittedRecords;
public long emittedBytes;

public StreamStats() {
this(0L, 0L, 0L, 0L);
}

public StreamStats(final long estimatedBytes, final long emittedBytes, final long estimatedRecords, final long emittedRecords) {
this.estimatedRecords = estimatedRecords;
this.estimatedBytes = estimatedBytes;
this.emittedRecords = emittedRecords;
this.emittedBytes = emittedBytes;
}

@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final StreamStats that = (StreamStats) o;
return estimatedRecords == that.estimatedRecords && estimatedBytes == that.estimatedBytes && emittedRecords == that.emittedRecords
&& emittedBytes == that.emittedBytes;
}

@Override
public int hashCode() {
return Objects.hash(estimatedRecords, estimatedBytes, emittedRecords, emittedBytes);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@
import io.airbyte.workers.exception.WorkerException;
import io.airbyte.workers.helper.FailureHelper;
import io.airbyte.workers.internal.AirbyteDestination;
import io.airbyte.workers.internal.AirbyteMessageTracker;
import io.airbyte.workers.internal.AirbyteSource;
import io.airbyte.workers.internal.NamespacingMapper;
import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker;
import io.airbyte.workers.test_utils.AirbyteMessageUtils;
import io.airbyte.workers.test_utils.TestConfigHelpers;
import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
Expand All @@ -15,7 +15,7 @@
import io.airbyte.protocol.models.AirbyteMessage;
import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair;
import io.airbyte.workers.helper.FailureHelper;
import io.airbyte.workers.internal.StateDeltaTracker.StateDeltaTrackerException;
import io.airbyte.workers.internal.book_keeping.StateDeltaTracker.StateDeltaTrackerException;
import io.airbyte.workers.internal.state_aggregator.StateAggregator;
import io.airbyte.workers.test_utils.AirbyteMessageUtils;
import java.util.HashMap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import io.airbyte.workers.internal.StateDeltaTracker.StateDeltaTrackerException;
import io.airbyte.workers.internal.book_keeping.StateDeltaTracker.StateDeltaTrackerException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import io.airbyte.protocol.models.AirbyteMessage;
import io.airbyte.protocol.models.AirbyteStateMessage;
import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerNoStateMatchException;
import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerOomException;
import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerNoStateMatchException;
import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerOomException;
import io.airbyte.workers.test_utils.AirbyteMessageUtils;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import io.airbyte.workers.WorkerMetricReporter;
import io.airbyte.workers.WorkerUtils;
import io.airbyte.workers.general.DefaultReplicationWorker;
import io.airbyte.workers.internal.AirbyteMessageTracker;
import io.airbyte.workers.internal.AirbyteStreamFactory;
import io.airbyte.workers.internal.DefaultAirbyteDestination;
import io.airbyte.workers.internal.DefaultAirbyteSource;
Expand All @@ -39,6 +38,7 @@
import io.airbyte.workers.internal.NamespacingMapper;
import io.airbyte.workers.internal.VersionedAirbyteMessageBufferedWriterFactory;
import io.airbyte.workers.internal.VersionedAirbyteStreamFactory;
import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker;
import io.airbyte.workers.process.AirbyteIntegrationLauncher;
import io.airbyte.workers.process.KubePodProcess;
import io.airbyte.workers.process.ProcessFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ public AttemptHandler(JobPersistence jobPersistence) {
this.jobPersistence = jobPersistence;
}

public InternalOperationResult setWorkflowInAttempt(
SetWorkflowInAttemptRequestBody requestBody) {
public InternalOperationResult setWorkflowInAttempt(SetWorkflowInAttemptRequestBody requestBody) {
try {
jobPersistence.setAttemptTemporalWorkflowInfo(requestBody.getJobId(),
requestBody.getAttemptNumber(), requestBody.getWorkflowId().toString(), requestBody.getProcessingTaskQueue());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@
import io.airbyte.workers.WorkerMetricReporter;
import io.airbyte.workers.WorkerUtils;
import io.airbyte.workers.general.DefaultReplicationWorker;
import io.airbyte.workers.internal.AirbyteMessageTracker;
import io.airbyte.workers.internal.AirbyteSource;
import io.airbyte.workers.internal.DefaultAirbyteDestination;
import io.airbyte.workers.internal.DefaultAirbyteSource;
import io.airbyte.workers.internal.EmptyAirbyteSource;
import io.airbyte.workers.internal.NamespacingMapper;
import io.airbyte.workers.internal.VersionedAirbyteMessageBufferedWriterFactory;
import io.airbyte.workers.internal.VersionedAirbyteStreamFactory;
import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker;
import io.airbyte.workers.process.AirbyteIntegrationLauncher;
import io.airbyte.workers.process.IntegrationLauncher;
import io.airbyte.workers.process.ProcessFactory;
Expand Down