-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
✨ Introduce StateIteratorProcessor in CDK #33312
Merged
Merged
Changes from 7 commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
2cd3b62
save work
xiaohansong ab8be59
Merge remote-tracking branch 'origin/master' into xiaohan/stateiterator
xiaohansong f8b2b84
add comment on interface
xiaohansong 27e3713
delete unrelated merge change
xiaohansong f9a270e
add unit test for sourceStateIterator
xiaohansong e65713f
cdk change - support ctid
xiaohansong 34999c7
mysql related change, and support ctid
xiaohansong 2cb6ac0
refactor - renaming processor to manager
xiaohansong b2169e1
Merge branch 'master' into xiaohan/stateiterator
xiaohansong 40612ed
Merge remote-tracking branch 'origin/master' into xiaohan/stateiterator
xiaohansong 48d2fa9
format
xiaohansong ebe365d
Merge remote-tracking branch 'origin/master' into xiaohan/stateiterator
xiaohansong 8441bab
readme
xiaohansong 7cb5932
fix test
xiaohansong 1ae7c0f
more formatting
xiaohansong 6bf24e7
use latest cdk
xiaohansong cd06b0c
update mysql version
xiaohansong 5667e3f
Merge branch 'master' into xiaohan/stateiterator
xiaohansong File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
70 changes: 70 additions & 0 deletions
70
.../main/java/io/airbyte/cdk/integrations/source/relationaldb/state/SourceStateIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package io.airbyte.cdk.integrations.source.relationaldb.state; | ||
|
||
import com.google.common.collect.AbstractIterator; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type; | ||
import io.airbyte.protocol.models.v0.AirbyteStateMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteStateStats; | ||
import java.time.Instant; | ||
import java.util.Iterator; | ||
import javax.annotation.CheckForNull; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public class SourceStateIterator<T> extends AbstractIterator<AirbyteMessage> implements Iterator<AirbyteMessage> { | ||
private static final Logger LOGGER = LoggerFactory.getLogger(SourceStateIterator.class); | ||
private final Iterator<T> messageIterator; | ||
private boolean hasEmittedFinalState = false; | ||
private long recordCount = 0L; | ||
private Instant lastCheckpoint = Instant.now(); | ||
|
||
private final SourceStateIteratorProcessor sourceStateIteratorProcessor; | ||
|
||
public SourceStateIterator(final Iterator<T> messageIterator, | ||
final SourceStateIteratorProcessor sourceStateIteratorProcessor) { | ||
this.messageIterator = messageIterator; | ||
this.sourceStateIteratorProcessor = sourceStateIteratorProcessor; | ||
} | ||
|
||
@CheckForNull | ||
@Override | ||
protected AirbyteMessage computeNext() { | ||
boolean iteratorHasNextValue = false; | ||
try { | ||
iteratorHasNextValue = messageIterator.hasNext(); | ||
} catch (Exception ex) { | ||
LOGGER.info("Caught exception while trying to get the next from message iterator. Treating hasNext to false. ", ex); | ||
} | ||
if (iteratorHasNextValue) { | ||
if (sourceStateIteratorProcessor.shouldEmitStateMessage(recordCount, lastCheckpoint)) { | ||
AirbyteStateMessage stateMessage = sourceStateIteratorProcessor.generateStateMessageAtCheckpoint(); | ||
stateMessage.withSourceStats(new AirbyteStateStats().withRecordCount((double) recordCount)); | ||
|
||
recordCount = 0L; | ||
lastCheckpoint = Instant.now(); | ||
return new AirbyteMessage() | ||
.withType(Type.STATE) | ||
.withState(stateMessage); | ||
} | ||
// Use try-catch to catch Exception that could occur when connection to the database fails | ||
try { | ||
final T message = messageIterator.next(); | ||
final AirbyteMessage processedMessage = sourceStateIteratorProcessor.processRecordMessage(message); | ||
recordCount++; | ||
return processedMessage; | ||
} catch (final Exception e) { | ||
throw new RuntimeException(e); | ||
} | ||
} else if (!hasEmittedFinalState) { | ||
hasEmittedFinalState = true; | ||
final AirbyteStateMessage finalStateMessage = sourceStateIteratorProcessor.createFinalStateMessage(); | ||
return new AirbyteMessage() | ||
.withType(Type.STATE) | ||
.withState(finalStateMessage); | ||
} else { | ||
return endOfData(); | ||
} | ||
} | ||
|
||
|
||
} |
30 changes: 30 additions & 0 deletions
30
...a/io/airbyte/cdk/integrations/source/relationaldb/state/SourceStateIteratorProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package io.airbyte.cdk.integrations.source.relationaldb.state; | ||
|
||
import io.airbyte.protocol.models.v0.AirbyteMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type; | ||
import io.airbyte.protocol.models.v0.AirbyteStateMessage; | ||
import java.time.Instant; | ||
|
||
public interface SourceStateIteratorProcessor<T> { | ||
/** | ||
* Returns a state message that should be emitted at checkpoint. | ||
*/ | ||
AirbyteStateMessage generateStateMessageAtCheckpoint(); | ||
|
||
/** | ||
* For the incoming record message, this method defines how the connector will consume it. | ||
*/ | ||
AirbyteMessage processRecordMessage(final T message); | ||
|
||
/** | ||
* At the end of the iteration, this method will be called and it will generate the final state message. | ||
* @return | ||
*/ | ||
AirbyteStateMessage createFinalStateMessage(); | ||
|
||
/** | ||
* Determines if the iterator has reached checkpoint or not, | ||
* based on the time and number of record messages it has been processed since the last checkpoint. | ||
*/ | ||
boolean shouldEmitStateMessage(final long recordCount, final Instant lastCheckpoint); | ||
} |
87 changes: 87 additions & 0 deletions
87
...t/java/io/airbyte/cdk/integrations/source/relationaldb/state/SourceStateIteratorTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package io.airbyte.cdk.integrations.source.relationaldb.state; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import static org.mockito.ArgumentMatchers.anyLong; | ||
import static org.mockito.ArgumentMatchers.eq; | ||
import static org.mockito.Mockito.atLeastOnce; | ||
import static org.mockito.Mockito.doNothing; | ||
import static org.mockito.Mockito.doReturn; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.verify; | ||
import static org.mockito.Mockito.when; | ||
|
||
import io.airbyte.protocol.models.v0.AirbyteStateMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteStateStats; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type; | ||
import io.airbyte.protocol.models.v0.AirbyteRecordMessage; | ||
import java.util.Iterator; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
public class SourceStateIteratorTest { | ||
|
||
SourceStateIteratorProcessor mockProcessor; | ||
Iterator<AirbyteMessage> messageIterator; | ||
|
||
SourceStateIterator sourceStateIterator; | ||
|
||
@BeforeEach | ||
void setup() { | ||
mockProcessor = mock(SourceStateIteratorProcessor.class); | ||
messageIterator = mock(Iterator.class); | ||
sourceStateIterator = new SourceStateIterator(messageIterator, mockProcessor); | ||
} | ||
|
||
// Provides a way to generate a record message and will verify corresponding spied functions have been called. | ||
void processRecordMessage() { | ||
doReturn(true).when(messageIterator).hasNext(); | ||
doReturn(false).when(mockProcessor).shouldEmitStateMessage(anyLong(), any()); | ||
doNothing().when(mockProcessor).processRecordMessage(any()); | ||
AirbyteMessage message = new AirbyteMessage().withType(Type.RECORD).withRecord(new AirbyteRecordMessage()); | ||
doReturn(message).when(messageIterator).next(); | ||
|
||
assertEquals(message, sourceStateIterator.computeNext()); | ||
verify(mockProcessor, atLeastOnce()).processRecordMessage(message); | ||
verify(mockProcessor, atLeastOnce()).shouldEmitStateMessage(eq(0L), any()); | ||
} | ||
@Test | ||
void testShouldProcessRecordMessage() { | ||
processRecordMessage(); | ||
} | ||
|
||
@Test | ||
void testShouldEmitStateMessage() { | ||
processRecordMessage(); | ||
doReturn(true).when(mockProcessor).shouldEmitStateMessage(anyLong(), any()); | ||
final AirbyteStateMessage stateMessage = new AirbyteStateMessage(); | ||
doReturn(stateMessage).when(mockProcessor).generateStateMessageAtCheckpoint(); | ||
AirbyteMessage expectedMessage = new AirbyteMessage().withType(Type.STATE).withState(stateMessage); | ||
expectedMessage.getState().withSourceStats(new AirbyteStateStats().withRecordCount(1.0)); | ||
assertEquals(expectedMessage, sourceStateIterator.computeNext()); | ||
} | ||
|
||
@Test | ||
void testShouldEmitFinalStateMessage() { | ||
processRecordMessage(); | ||
processRecordMessage(); | ||
doReturn(false).when(messageIterator).hasNext(); | ||
final AirbyteStateMessage stateMessage = new AirbyteStateMessage(); | ||
doReturn(stateMessage).when(mockProcessor).createFinalStateMessage(); | ||
AirbyteMessage expectedMessage = new AirbyteMessage().withType(Type.STATE).withState(stateMessage); | ||
expectedMessage.getState().withSourceStats(new AirbyteStateStats().withRecordCount(2.0)); | ||
assertEquals(expectedMessage, sourceStateIterator.computeNext()); | ||
} | ||
|
||
@Test | ||
void testShouldSendEndOfData() { | ||
processRecordMessage(); | ||
doReturn(false).when(messageIterator).hasNext(); | ||
doReturn(new AirbyteStateMessage()).when(mockProcessor).createFinalStateMessage(); | ||
sourceStateIterator.computeNext(); | ||
|
||
// After sending the final state, if iterator was called again, we will return null. | ||
assertEquals(null, sourceStateIterator.computeNext()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 0 additions & 104 deletions
104
.../java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialSyncStateIterator.java
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at the implementation of this is
MySqlInitialSyncStateIteratorProcessor
- this seems to be a wrapper around StateManager? Perhaps we should rename this to SourceStateManager?A more stretch goal is to see if there is a way to combine parts of this code and the various StateManagers since there is definitely some duplicate code (move certain fields to generics)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Discussed offline and we decide to keep this as is-
I didn’t make the stateManager change, because: