-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Avro parser: return Decimal fields as strings #29182
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,7 +66,7 @@ async def infer_schema( | |
def _convert_avro_type_to_json(cls, avro_format: AvroFormat, field_name: str, avro_field: str) -> Mapping[str, Any]: | ||
if isinstance(avro_field, str) and avro_field in AVRO_TYPE_TO_JSON_TYPE: | ||
# Legacy behavior to retain backwards compatibility. Long term we should always represent doubles as strings | ||
if avro_field == "double" and avro_format.decimal_as_float: | ||
if avro_field == "double" and not avro_format.double_as_string: | ||
return {"type": "number"} | ||
return {"type": AVRO_TYPE_TO_JSON_TYPE[avro_field]} | ||
if isinstance(avro_field, Mapping): | ||
|
@@ -152,10 +152,18 @@ def file_read_mode(self) -> FileReadMode: | |
@staticmethod | ||
def _to_output_value(avro_format: AvroFormat, record_type: Mapping[str, Any], record_value: Any) -> Any: | ||
if not isinstance(record_type, Mapping): | ||
if record_type == "double" and not avro_format.decimal_as_float: | ||
if record_type == "double" and avro_format.double_as_string: | ||
return str(record_value) | ||
return record_value | ||
if record_type.get("logicalType") == "uuid": | ||
return uuid.UUID(bytes=record_value) | ||
elif record_type.get("logicalType") == "decimal": | ||
return str(record_value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
elif record_type.get("logicalType") == "date": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. explicitly convert the output to the format we want instead of returning a date/datetime |
||
return record_value.isoformat() | ||
elif record_type.get("logicalType") == "local-timestamp-millis": | ||
return record_value.isoformat(sep="T", timespec="milliseconds") | ||
elif record_type.get("logicalType") == "local-timestamp-micros": | ||
return record_value.isoformat(sep="T", timespec="microseconds") | ||
else: | ||
return record_value |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -398,7 +398,7 @@ | |
"drummer": "George Daniel", | ||
}, | ||
"col_fixed": "\x12\x34\x56\x78", | ||
"col_decimal": 1234.56789, | ||
"col_decimal": "1234.56789", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. decimals should always be read as strings |
||
"col_uuid": "123e4567-e89b-12d3-a456-426655440000", | ||
"col_date": "2022-05-29", | ||
"col_time_millis": "06:00:00.456000", | ||
|
@@ -623,9 +623,9 @@ | |
) | ||
).build() | ||
|
||
avro_file_with_decimal_as_float_scenario = ( | ||
avro_file_with_double_as_number_scenario = ( | ||
TestScenarioBuilder() | ||
.set_name("avro_file_with_decimal_as_float_stream") | ||
.set_name("avro_file_with_double_as_number_stream") | ||
.set_config( | ||
{ | ||
"streams": [ | ||
|
@@ -637,7 +637,7 @@ | |
"format": { | ||
"avro": { | ||
"filetype": "avro", | ||
"decimal_as_float": True | ||
"double_as_string": False | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And to confirm. For legacy S3 connectors, the adapter will by default set this to False if I understand correct.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, created an issue #29225