Skip to content

Commit

Permalink
Further simplify metrics tracker. (#19988)
Browse files Browse the repository at this point in the history
Follow up from ##19814, where we introduced the StreamStats object to consolidate/simplify some of the stats memory objects.

In this PR, we extend the StreamStats object to also include the emitted records and bytes.

- Make StreamStats into a proper object. We cannot use a record as record fields are immutable. We need mutable fields to count.
- Consolidate the emitted records into StreamStats.
- Take the chance to move all the stats/metrics related classes into a book_keeping package to keep things clean.
  • Loading branch information
davinchia authored Dec 3, 2022
1 parent b7d2681 commit 990931e
Show file tree
Hide file tree
Showing 13 changed files with 99 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import io.airbyte.workers.internal.AirbyteDestination;
import io.airbyte.workers.internal.AirbyteMapper;
import io.airbyte.workers.internal.AirbyteSource;
import io.airbyte.workers.internal.MessageTracker;
import io.airbyte.workers.internal.book_keeping.MessageTracker;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import static io.airbyte.metrics.lib.ApmTraceConstants.WORKER_OPERATION_NAME;

Expand All @@ -28,7 +28,7 @@
import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair;
import io.airbyte.protocol.models.AirbyteTraceMessage;
import io.airbyte.workers.helper.FailureHelper;
import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerNoStateMatchException;
import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerNoStateMatchException;
import io.airbyte.workers.internal.state_aggregator.DefaultStateAggregator;
import io.airbyte.workers.internal.state_aggregator.StateAggregator;
import java.time.LocalDateTime;
Expand All @@ -42,6 +42,13 @@
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;

/**
* This class is responsible for stats and metadata tracking surrounding
* {@link AirbyteRecordMessage}.
* <p>
* It is not intended to perform meaningful operations - transforming, mutating, triggering
* downstream actions etc. - on specific messages.
*/
@Slf4j
public class AirbyteMessageTracker implements MessageTracker {

Expand All @@ -54,8 +61,6 @@ public class AirbyteMessageTracker implements MessageTracker {
private final HashFunction hashFunction;
private final BiMap<AirbyteStreamNameNamespacePair, Short> nameNamespacePairToIndex;
private final Map<AirbyteStreamNameNamespacePair, StreamStats> nameNamespacePairToStreamStats;
private final Map<Short, Long> streamToTotalBytesEmitted;
private final Map<Short, Long> streamToTotalRecordsEmitted;
private final StateDeltaTracker stateDeltaTracker;
private final StateMetricsTracker stateMetricsTracker;
private final List<AirbyteTraceMessage> destinationErrorTraceMessages;
Expand Down Expand Up @@ -86,11 +91,6 @@ private enum ConnectorType {
DESTINATION
}

/**
* POJO for all per-stream stats.
*/
private record StreamStats(long estimatedBytes, long emittedBytes, long estimatedRecords, long emittedRecords) {}

public AirbyteMessageTracker() {
this(new StateDeltaTracker(STATE_DELTA_TRACKER_MEMORY_LIMIT_BYTES),
new DefaultStateAggregator(new EnvVariableFeatureFlags().useStreamCapableState()),
Expand All @@ -107,8 +107,6 @@ protected AirbyteMessageTracker(final StateDeltaTracker stateDeltaTracker,
this.nameNamespacePairToIndex = HashBiMap.create();
this.hashFunction = Hashing.murmur3_32_fixed();
this.nameNamespacePairToStreamStats = new HashMap<>();
this.streamToTotalBytesEmitted = new HashMap<>();
this.streamToTotalRecordsEmitted = new HashMap<>();
this.stateDeltaTracker = stateDeltaTracker;
this.stateMetricsTracker = stateMetricsTracker;
this.nextStreamIndex = 0;
Expand Down Expand Up @@ -155,17 +153,19 @@ private void handleSourceEmittedRecord(final AirbyteRecordMessage recordMessage)
stateMetricsTracker.setFirstRecordReceivedAt(LocalDateTime.now());
}

final short streamIndex = getStreamIndex(AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage));
final var nameNamespace = AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage);
final short streamIndex = getStreamIndex(nameNamespace);

final long currentRunningCount = streamToRunningCount.getOrDefault(streamIndex, 0L);
streamToRunningCount.put(streamIndex, currentRunningCount + 1);

final long currentTotalCount = streamToTotalRecordsEmitted.getOrDefault(streamIndex, 0L);
streamToTotalRecordsEmitted.put(streamIndex, currentTotalCount + 1);
final var currStats = nameNamespacePairToStreamStats.getOrDefault(nameNamespace, new StreamStats());
currStats.emittedRecords++;

final int estimatedNumBytes = Jsons.getEstimatedByteSize(recordMessage.getData());
final long currentTotalStreamBytes = streamToTotalBytesEmitted.getOrDefault(streamIndex, 0L);
streamToTotalBytesEmitted.put(streamIndex, currentTotalStreamBytes + estimatedNumBytes);
currStats.emittedBytes += estimatedNumBytes;

nameNamespacePairToStreamStats.put(nameNamespace, currStats);
}

/**
Expand Down Expand Up @@ -296,9 +296,11 @@ private void handleEmittedEstimateTrace(final AirbyteEstimateTraceMessage estima
Preconditions.checkArgument(totalRecordsEstimatedSync == null, "STREAM and SYNC estimates should not be emitted in the same sync.");

log.debug("Saving stream estimates for namespace: {}, stream: {}", estimate.getNamespace(), estimate.getName());
nameNamespacePairToStreamStats.put(
new AirbyteStreamNameNamespacePair(estimate.getName(), estimate.getNamespace()),
new StreamStats(estimate.getByteEstimate(), 0L, estimate.getRowEstimate(), 0L));
final var nameNamespace = new AirbyteStreamNameNamespacePair(estimate.getName(), estimate.getNamespace());
final var currStats = nameNamespacePairToStreamStats.getOrDefault(nameNamespace, new StreamStats());
currStats.estimatedRecords = estimate.getRowEstimate();
currStats.estimatedBytes = estimate.getByteEstimate();
nameNamespacePairToStreamStats.put(nameNamespace, currStats);
}
case SYNC -> {
Preconditions.checkArgument(nameNamespacePairToStreamStats.isEmpty(), "STREAM and SYNC estimates should not be emitted in the same sync.");
Expand Down Expand Up @@ -404,8 +406,8 @@ public Optional<Map<AirbyteStreamNameNamespacePair, Long>> getStreamToCommittedR
*/
@Override
public Map<AirbyteStreamNameNamespacePair, Long> getStreamToEmittedRecords() {
return streamToTotalRecordsEmitted.entrySet().stream().collect(Collectors.toMap(
entry -> nameNamespacePairToIndex.inverse().get(entry.getKey()), Entry::getValue));
return nameNamespacePairToStreamStats.entrySet().stream().collect(Collectors.toMap(
Entry::getKey, entry -> entry.getValue().emittedRecords));
}

/**
Expand All @@ -416,16 +418,17 @@ public Map<AirbyteStreamNameNamespacePair, Long> getStreamToEstimatedRecords() {
return nameNamespacePairToStreamStats.entrySet().stream().collect(
Collectors.toMap(
Entry::getKey,
entry -> entry.getValue().estimatedRecords()));
entry -> entry.getValue().estimatedRecords));
}

/**
* Swap out stream indices for stream names and return total bytes emitted by stream.
*/
@Override
public Map<AirbyteStreamNameNamespacePair, Long> getStreamToEmittedBytes() {
return streamToTotalBytesEmitted.entrySet().stream().collect(Collectors.toMap(
entry -> nameNamespacePairToIndex.inverse().get(entry.getKey()), Entry::getValue));
return nameNamespacePairToStreamStats.entrySet().stream().collect(Collectors.toMap(
Entry::getKey,
entry -> entry.getValue().emittedBytes));
}

/**
Expand All @@ -436,15 +439,17 @@ public Map<AirbyteStreamNameNamespacePair, Long> getStreamToEstimatedBytes() {
return nameNamespacePairToStreamStats.entrySet().stream().collect(
Collectors.toMap(
Entry::getKey,
entry -> entry.getValue().estimatedBytes()));
entry -> entry.getValue().estimatedBytes));
}

/**
* Compute sum of emitted record counts across all streams.
*/
@Override
public long getTotalRecordsEmitted() {
return streamToTotalRecordsEmitted.values().stream().reduce(0L, Long::sum);
return nameNamespacePairToStreamStats.values().stream()
.map(stats -> stats.emittedRecords)
.reduce(0L, Long::sum);
}

/**
Expand All @@ -466,7 +471,9 @@ public long getTotalRecordsEstimated() {
*/
@Override
public long getTotalBytesEmitted() {
return streamToTotalBytesEmitted.values().stream().reduce(0L, Long::sum);
return nameNamespacePairToStreamStats.values().stream()
.map(e -> e.emittedBytes)
.reduce(0L, Long::sum);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import io.airbyte.config.FailureReason;
import io.airbyte.config.State;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import static io.airbyte.metrics.lib.ApmTraceConstants.WORKER_OPERATION_NAME;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import io.airbyte.protocol.models.AirbyteStateMessage;
import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal.book_keeping;

import java.util.Objects;

/**
* POJO for all per-stream stats.
* <p>
* We are not able to use a {@link Record} since we want non-final fields to accumulate counts.
*/
public class StreamStats {

public long estimatedRecords;
public long estimatedBytes;
public long emittedRecords;
public long emittedBytes;

public StreamStats() {
this(0L, 0L, 0L, 0L);
}

public StreamStats(final long estimatedBytes, final long emittedBytes, final long estimatedRecords, final long emittedRecords) {
this.estimatedRecords = estimatedRecords;
this.estimatedBytes = estimatedBytes;
this.emittedRecords = emittedRecords;
this.emittedBytes = emittedBytes;
}

@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final StreamStats that = (StreamStats) o;
return estimatedRecords == that.estimatedRecords && estimatedBytes == that.estimatedBytes && emittedRecords == that.emittedRecords
&& emittedBytes == that.emittedBytes;
}

@Override
public int hashCode() {
return Objects.hash(estimatedRecords, estimatedBytes, emittedRecords, emittedBytes);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@
import io.airbyte.workers.exception.WorkerException;
import io.airbyte.workers.helper.FailureHelper;
import io.airbyte.workers.internal.AirbyteDestination;
import io.airbyte.workers.internal.AirbyteMessageTracker;
import io.airbyte.workers.internal.AirbyteSource;
import io.airbyte.workers.internal.NamespacingMapper;
import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker;
import io.airbyte.workers.test_utils.AirbyteMessageUtils;
import io.airbyte.workers.test_utils.TestConfigHelpers;
import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
Expand All @@ -15,7 +15,7 @@
import io.airbyte.protocol.models.AirbyteMessage;
import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair;
import io.airbyte.workers.helper.FailureHelper;
import io.airbyte.workers.internal.StateDeltaTracker.StateDeltaTrackerException;
import io.airbyte.workers.internal.book_keeping.StateDeltaTracker.StateDeltaTrackerException;
import io.airbyte.workers.internal.state_aggregator.StateAggregator;
import io.airbyte.workers.test_utils.AirbyteMessageUtils;
import java.util.HashMap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import io.airbyte.workers.internal.StateDeltaTracker.StateDeltaTrackerException;
import io.airbyte.workers.internal.book_keeping.StateDeltaTracker.StateDeltaTrackerException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.workers.internal;
package io.airbyte.workers.internal.book_keeping;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import io.airbyte.protocol.models.AirbyteMessage;
import io.airbyte.protocol.models.AirbyteStateMessage;
import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerNoStateMatchException;
import io.airbyte.workers.internal.StateMetricsTracker.StateMetricsTrackerOomException;
import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerNoStateMatchException;
import io.airbyte.workers.internal.book_keeping.StateMetricsTracker.StateMetricsTrackerOomException;
import io.airbyte.workers.test_utils.AirbyteMessageUtils;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import io.airbyte.workers.WorkerMetricReporter;
import io.airbyte.workers.WorkerUtils;
import io.airbyte.workers.general.DefaultReplicationWorker;
import io.airbyte.workers.internal.AirbyteMessageTracker;
import io.airbyte.workers.internal.AirbyteStreamFactory;
import io.airbyte.workers.internal.DefaultAirbyteDestination;
import io.airbyte.workers.internal.DefaultAirbyteSource;
Expand All @@ -39,6 +38,7 @@
import io.airbyte.workers.internal.NamespacingMapper;
import io.airbyte.workers.internal.VersionedAirbyteMessageBufferedWriterFactory;
import io.airbyte.workers.internal.VersionedAirbyteStreamFactory;
import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker;
import io.airbyte.workers.process.AirbyteIntegrationLauncher;
import io.airbyte.workers.process.KubePodProcess;
import io.airbyte.workers.process.ProcessFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ public AttemptHandler(JobPersistence jobPersistence) {
this.jobPersistence = jobPersistence;
}

public InternalOperationResult setWorkflowInAttempt(
SetWorkflowInAttemptRequestBody requestBody) {
public InternalOperationResult setWorkflowInAttempt(SetWorkflowInAttemptRequestBody requestBody) {
try {
jobPersistence.setAttemptTemporalWorkflowInfo(requestBody.getJobId(),
requestBody.getAttemptNumber(), requestBody.getWorkflowId().toString(), requestBody.getProcessingTaskQueue());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@
import io.airbyte.workers.WorkerMetricReporter;
import io.airbyte.workers.WorkerUtils;
import io.airbyte.workers.general.DefaultReplicationWorker;
import io.airbyte.workers.internal.AirbyteMessageTracker;
import io.airbyte.workers.internal.AirbyteSource;
import io.airbyte.workers.internal.DefaultAirbyteDestination;
import io.airbyte.workers.internal.DefaultAirbyteSource;
import io.airbyte.workers.internal.EmptyAirbyteSource;
import io.airbyte.workers.internal.NamespacingMapper;
import io.airbyte.workers.internal.VersionedAirbyteMessageBufferedWriterFactory;
import io.airbyte.workers.internal.VersionedAirbyteStreamFactory;
import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker;
import io.airbyte.workers.process.AirbyteIntegrationLauncher;
import io.airbyte.workers.process.IntegrationLauncher;
import io.airbyte.workers.process.ProcessFactory;
Expand Down

0 comments on commit 990931e

Please sign in to comment.