-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Proof of concept parallel source stream reading implementation for My…
…SQL (#26580) * Proof of concept parallel source stream reading implementation for MySQL * Automated Change * Add read method that supports concurrent execution to Source interface * Remove parallel iterator * Ensure that executor service is stopped * Automated Commit - Format and Process Resources Changes * Expose method to fix compilation issue * Use concurrent map to avoid access issues * Automated Commit - Format and Process Resources Changes * Ensure concurrent streams finish before closing source * Fix compile issue * Formatting * Exclude concurrent stream threads from orphan thread watcher * Automated Commit - Format and Process Resources Changes * Refactor orphaned thread logic to account for concurrent execution * PR feedback * Implement readStreams in wrapper source * Automated Commit - Format and Process Resources Changes * Add readStream override * Automated Commit - Format and Process Resources Changes * 🤖 Auto format source-mysql code [skip ci] * 🤖 Auto format source-mysql code [skip ci] * 🤖 Auto format source-mysql code [skip ci] * 🤖 Auto format source-mysql code [skip ci] * 🤖 Auto format source-mysql code [skip ci] * Debug logging * Reduce logging level * Replace synchronized calls to System.out.println when concurrent * Close consumer * Flush before close * Automated Commit - Format and Process Resources Changes * Remove charset * Use ASCII and flush periodically for parallel streams * Test performance harness patch * Automated Commit - Format and Process Resources Changes * Cleanup * Logging to identify concurrent read enabled * Mark parameter as final --------- Co-authored-by: jdpgrailsdev <jdpgrailsdev@users.noreply.github.com> Co-authored-by: octavia-squidington-iii <octavia-squidington-iii@users.noreply.github.com> Co-authored-by: Rodi Reich Zilberman <867491+rodireich@users.noreply.github.com> Co-authored-by: rodireich <rodireich@users.noreply.github.com>
- Loading branch information
1 parent
2f7deae
commit 549e36f
Showing
22 changed files
with
1,391 additions
and
178 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
226 changes: 226 additions & 0 deletions
226
airbyte-commons/src/main/java/io/airbyte/commons/stream/StreamStatusUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
/* | ||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.commons.stream; | ||
|
||
import io.airbyte.commons.util.AirbyteStreamAware; | ||
import io.airbyte.commons.util.AutoCloseableIterator; | ||
import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteStreamStatusTraceMessage.AirbyteStreamStatus; | ||
import java.util.Optional; | ||
import java.util.function.Consumer; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* Collection of utility methods that support the generation of stream status updates. | ||
*/ | ||
public class StreamStatusUtils { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(StreamStatusUtils.class); | ||
|
||
/** | ||
* Creates a new {@link Consumer} that wraps the provided {@link Consumer} with stream status | ||
* reporting capabilities. Specifically, this consumer will emit an | ||
* {@link AirbyteStreamStatus#RUNNING} status after the first message is consumed by the delegated | ||
* {@link Consumer}. | ||
* | ||
* @param stream The stream from which the delegating {@link Consumer} will consume messages for | ||
* processing. | ||
* @param delegateRecordCollector The delegated {@link Consumer} that will be called when this | ||
* consumer accepts a message for processing. | ||
* @param streamStatusEmitter The optional {@link Consumer} that will be used to emit stream status | ||
* updates. | ||
* @return A wrapping {@link Consumer} that provides stream status updates when the provided | ||
* delegate {@link Consumer} is invoked. | ||
*/ | ||
public static Consumer<AirbyteMessage> statusTrackingRecordCollector(final AutoCloseableIterator<AirbyteMessage> stream, | ||
final Consumer<AirbyteMessage> delegateRecordCollector, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> streamStatusEmitter) { | ||
return new Consumer<>() { | ||
|
||
private boolean firstRead = true; | ||
|
||
@Override | ||
public void accept(final AirbyteMessage airbyteMessage) { | ||
try { | ||
delegateRecordCollector.accept(airbyteMessage); | ||
} finally { | ||
if (firstRead) { | ||
emitRunningStreamStatus(stream, streamStatusEmitter); | ||
firstRead = false; | ||
} | ||
} | ||
} | ||
|
||
}; | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#RUNNING} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitRunningStreamStatus(final AutoCloseableIterator<AirbyteMessage> airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
if (airbyteStream instanceof AirbyteStreamAware) { | ||
emitRunningStreamStatus((AirbyteStreamAware) airbyteStream, statusEmitter); | ||
} | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#RUNNING} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitRunningStreamStatus(final AirbyteStreamAware airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
emitRunningStreamStatus(airbyteStream.getAirbyteStream(), statusEmitter); | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#RUNNING} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitRunningStreamStatus(final Optional<AirbyteStreamNameNamespacePair> airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
airbyteStream.ifPresent(s -> { | ||
LOGGER.debug("RUNNING -> {}", s); | ||
emitStreamStatus(s, AirbyteStreamStatus.RUNNING, statusEmitter); | ||
}); | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#STARTED} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitStartStreamStatus(final AutoCloseableIterator<AirbyteMessage> airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
if (airbyteStream instanceof AirbyteStreamAware) { | ||
emitStartStreamStatus((AirbyteStreamAware) airbyteStream, statusEmitter); | ||
} | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#STARTED} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitStartStreamStatus(final AirbyteStreamAware airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
emitStartStreamStatus(airbyteStream.getAirbyteStream(), statusEmitter); | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#STARTED} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitStartStreamStatus(final Optional<AirbyteStreamNameNamespacePair> airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
airbyteStream.ifPresent(s -> { | ||
LOGGER.debug("STARTING -> {}", s); | ||
emitStreamStatus(s, AirbyteStreamStatus.STARTED, statusEmitter); | ||
}); | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#COMPLETE} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitCompleteStreamStatus(final AutoCloseableIterator<AirbyteMessage> airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
if (airbyteStream instanceof AirbyteStreamAware) { | ||
emitCompleteStreamStatus((AirbyteStreamAware) airbyteStream, statusEmitter); | ||
} | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#COMPLETE} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitCompleteStreamStatus(final AirbyteStreamAware airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
emitCompleteStreamStatus(airbyteStream.getAirbyteStream(), statusEmitter); | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#COMPLETE} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitCompleteStreamStatus(final Optional<AirbyteStreamNameNamespacePair> airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
airbyteStream.ifPresent(s -> { | ||
LOGGER.debug("COMPLETE -> {}", s); | ||
emitStreamStatus(s, AirbyteStreamStatus.COMPLETE, statusEmitter); | ||
}); | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#INCOMPLETE} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitIncompleteStreamStatus(final AutoCloseableIterator<AirbyteMessage> airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
if (airbyteStream instanceof AirbyteStreamAware) { | ||
emitIncompleteStreamStatus((AirbyteStreamAware) airbyteStream, statusEmitter); | ||
} | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#INCOMPLETE} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitIncompleteStreamStatus(final AirbyteStreamAware airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
emitIncompleteStreamStatus(airbyteStream.getAirbyteStream(), statusEmitter); | ||
} | ||
|
||
/** | ||
* Emits a {@link AirbyteStreamStatus#INCOMPLETE} stream status for the provided stream. | ||
* | ||
* @param airbyteStream The stream that should be associated with the stream status. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
public static void emitIncompleteStreamStatus(final Optional<AirbyteStreamNameNamespacePair> airbyteStream, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
airbyteStream.ifPresent(s -> { | ||
LOGGER.debug("INCOMPLETE -> {}", s); | ||
emitStreamStatus(s, AirbyteStreamStatus.INCOMPLETE, statusEmitter); | ||
}); | ||
} | ||
|
||
/** | ||
* Emits a stream status for the provided stream. | ||
* | ||
* @param airbyteStreamNameNamespacePair The stream identifier. | ||
* @param airbyteStreamStatus The status update. | ||
* @param statusEmitter The {@link Optional} stream status emitter. | ||
*/ | ||
private static void emitStreamStatus(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, | ||
final AirbyteStreamStatus airbyteStreamStatus, | ||
final Optional<Consumer<AirbyteStreamStatusHolder>> statusEmitter) { | ||
statusEmitter.ifPresent(consumer -> consumer.accept(new AirbyteStreamStatusHolder(airbyteStreamNameNamespacePair, airbyteStreamStatus))); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.