-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
improve error message for tables with invalid columns as cursor #15317
Changes from 4 commits
d60b2ec
87258a3
5321946
667ce0a
b0a90fa
f498ce1
20c0d8e
e51470c
52431d0
682e530
9732314
39a51c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -118,6 +118,16 @@ protected void putBoolean(final ObjectNode node, final String columnName, final | |
node.put(columnName, resultSet.getInt(index) > 0); | ||
} | ||
|
||
@Override | ||
public boolean isValidCursorType(final MysqlType cursorType) { | ||
return switch (cursorType) { | ||
case BIT, BOOLEAN, TINYINT, TINYINT_UNSIGNED, SMALLINT, SMALLINT_UNSIGNED, MEDIUMINT, MEDIUMINT_UNSIGNED, INT, INT_UNSIGNED, BIGINT, BIGINT_UNSIGNED, FLOAT, FLOAT_UNSIGNED, DOUBLE, DOUBLE_UNSIGNED, DECIMAL, DECIMAL_UNSIGNED, DATE, DATETIME, TIMESTAMP, TIME, YEAR, CHAR, VARCHAR, TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT, ENUM, SET, TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB, BINARY, VARBINARY -> true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do you think of this style? return switch (cursorType) {
case <mysql-specific types> -> true;
default -> super.isValidCursorType(cursorType);
}; I.e. only defining the types that aren't handled by JdbcSourceOperations already. (not a huge win in this case, but I think it would make e.g. getJsonType much nicer) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this approach as well, reduces replication of cursor types but it does make it harder to know the full list of supported cursor types without looking into the super class. For extensibility though this seems better There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: reorder values lexicographically for easier parsing and also to have the // since cursor are expected to be comparable, ... comment in the |
||
// since cursor are expected to be comparable, handle cursor typing strictly and error on | ||
// unrecognized types | ||
default -> false; | ||
}; | ||
} | ||
|
||
@Override | ||
public void setStatementField(final PreparedStatement preparedStatement, | ||
final int parameterIndex, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
import io.airbyte.integrations.BaseConnector; | ||
import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; | ||
import io.airbyte.integrations.base.Source; | ||
import io.airbyte.integrations.source.relationaldb.InvalidCursorException.InvalidCursorInfo; | ||
import io.airbyte.integrations.source.relationaldb.models.DbState; | ||
import io.airbyte.integrations.source.relationaldb.state.StateManager; | ||
import io.airbyte.integrations.source.relationaldb.state.StateManagerFactory; | ||
|
@@ -50,6 +51,7 @@ | |
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
import java.util.Optional; | ||
import java.util.Set; | ||
import java.util.concurrent.atomic.AtomicLong; | ||
|
@@ -125,6 +127,8 @@ public AutoCloseableIterator<AirbyteMessage> read(final JsonNode config, | |
.collect(Collectors.toMap(t -> String.format("%s.%s", t.getNameSpace(), t.getName()), Function | ||
.identity())); | ||
|
||
validateCursorFieldForIncrementalTables(fullyQualifiedTableNameToInfo, catalog); | ||
|
||
final List<AutoCloseableIterator<AirbyteMessage>> incrementalIterators = | ||
getIncrementalIterators(database, catalog, fullyQualifiedTableNameToInfo, stateManager, emittedAt); | ||
final List<AutoCloseableIterator<AirbyteMessage>> fullRefreshIterators = | ||
|
@@ -142,6 +146,41 @@ public AutoCloseableIterator<AirbyteMessage> read(final JsonNode config, | |
}); | ||
} | ||
|
||
private void validateCursorFieldForIncrementalTables(final Map<String, TableInfo<CommonField<DataType>>> tableNameToTable, final ConfiguredAirbyteCatalog catalog) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unless I'm mistaken, shouldn't this method have Another note is can we have a short javadoc comment that says something along the lines of /**
* Creates a list of incremental tables with invalid cursor columns (e.g. non-numeric types). Will also throw
* `InvalidCursorException` if at least one table includes an invalid cursor type
*/ EDIT: After chatting with Ed on this, since |
||
final List<InvalidCursorInfo> tablesWithInvalidCursor = new ArrayList<>(); | ||
for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) { | ||
final AirbyteStream stream = airbyteStream.getStream(); | ||
final String fullyQualifiedTableName = getFullyQualifiedTableName(stream.getNamespace(), | ||
stream.getName()); | ||
final boolean hasSourceDefinedCursor = | ||
!Objects.isNull(airbyteStream.getStream().getSourceDefinedCursor()) && airbyteStream.getStream().getSourceDefinedCursor(); | ||
if (!tableNameToTable.containsKey(fullyQualifiedTableName) || airbyteStream.getSyncMode() != SyncMode.INCREMENTAL || hasSourceDefinedCursor) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for my understanding: is there any situation where |
||
continue; | ||
} | ||
|
||
final TableInfo<CommonField<DataType>> table = tableNameToTable | ||
.get(fullyQualifiedTableName); | ||
final String cursorField = IncrementalUtils.getCursorField(airbyteStream); | ||
final DataType cursorType = table.getFields().stream() | ||
.filter(info -> info.getName().equals(cursorField)) | ||
.map(CommonField::getType) | ||
.findFirst() | ||
.orElseThrow(); | ||
|
||
if (isValidCursorType(cursorType)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: this seems a little bit nicer (avoids using if (!isValidCursorType(cursorType)) {
tablesWithInvalidCursor.add(new InvalidCursorInfo(fullyQualifiedTableName, cursorField, cursorType.toString()));
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 on this comment |
||
continue; | ||
} | ||
|
||
tablesWithInvalidCursor.add(new InvalidCursorInfo(fullyQualifiedTableName, cursorField, cursorType.toString())); | ||
} | ||
|
||
if (!tablesWithInvalidCursor.isEmpty()) { | ||
throw new InvalidCursorException(tablesWithInvalidCursor); | ||
} | ||
} | ||
|
||
protected abstract boolean isValidCursorType(final DataType cursorType); | ||
|
||
protected List<TableInfo<CommonField<DataType>>> discoverWithoutSystemTables(final Database database) throws Exception { | ||
final Set<String> systemNameSpaces = getExcludedInternalNameSpaces(); | ||
final List<TableInfo<CommonField<DataType>>> discoveredTables = discoverInternal(database); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package io.airbyte.integrations.source.relationaldb; | ||
|
||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
|
||
public class InvalidCursorException extends RuntimeException { | ||
|
||
public InvalidCursorException(final List<InvalidCursorInfo> tablesWithInvalidCursor) { | ||
super("The following tables have invalid columns selected as cursor, please select a valid column as a cursor. " + tablesWithInvalidCursor.stream().map(InvalidCursorInfo::toString) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: maybe replace "a valid column" with "a column with a well-defined ordering"? So that it's clear why the cursor isn't valid |
||
.collect(Collectors.joining(","))); | ||
} | ||
|
||
public record InvalidCursorInfo(String tableName, String cursorColumnName, String cursorSqlType) { | ||
|
||
@Override | ||
public String toString() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tiny nitpick: I'd prefer to define a new method |
||
return "{" + | ||
"tableName='" + tableName + '\'' + | ||
", cursorColumnName='" + cursorColumnName + '\'' + | ||
", cursorSqlType=" + cursorSqlType + | ||
'}'; | ||
} | ||
} | ||
|
||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,6 +85,14 @@ public void setJsonField(ResultSet resultSet, int colIndex, ObjectNode json) thr | |
} | ||
} | ||
|
||
@Override | ||
public boolean isValidCursorType(final MysqlType cursorType) { | ||
return switch (cursorType) { | ||
case BIT, BOOLEAN, TINYINT, TINYINT_UNSIGNED, SMALLINT, SMALLINT_UNSIGNED, MEDIUMINT, MEDIUMINT_UNSIGNED, INT, INT_UNSIGNED, BIGINT, BIGINT_UNSIGNED, FLOAT, FLOAT_UNSIGNED, DOUBLE, DOUBLE_UNSIGNED, DECIMAL, DECIMAL_UNSIGNED, DATE, DATETIME, TIMESTAMP, TIME, YEAR, CHAR, VARCHAR, TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT, ENUM, SET, TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB, BINARY, VARBINARY -> true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is a large EDIT: chatting with Ed on this I'm not hard set on lexicographically sorting since it can also be easier for people that look at the data as grouped sets of values, may like to still consider how "readable" this would be for someone looking to understand which types are supported though |
||
default -> false; | ||
}; | ||
} | ||
|
||
@Override | ||
public void setStatementField(PreparedStatement preparedStatement, int parameterIndex, MysqlType cursorFieldType, String value) | ||
throws SQLException { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same as previous comment, would like to see if this can be reordered lexicographically to more easily know if a cursor type is supported