Skip to content

Commit 8bc44a7

Browse files
authored
fix: Add support for unsigned Arrow datatypes in schema conversion (#1617)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #675 ## What changes are included in this PR? <!-- Provide a summary of the modifications in this PR. List the main changes such as new features, bug fixes, refactoring, or any other updates. --> ### Bug Fixes - Fixed crash when ArrowSchemaConverter encounters unsigned datatypes - Resolved "Unsupported Arrow data type" errors for UInt8/16/32/64 ### Features - Added casting support for unsigned Arrow types - UInt8/16 → Int32 (safe casting to larger signed type) - UInt32 → Int64 (safe casting to larger signed type) - UInt64 → Error (no safe casting option, explicit error with guidance) ### Code Changes - Enhanced ArrowSchemaConverter primitive() method with unsigned type handling - Added comprehensive test: test_unsigned_type_casting() for all unsigned variants ### Files Modified - `crates/iceberg/src/arrow/schema.rs` ## Impact ✅ No breaking changes - existing functionality preserved ✅ Safe type casting prevents overflow issues ✅ Clear error messages for unsupported UInt64 with alternatives ✅ Follows proven PyIceberg implementation approach ## Are these changes tested? - All existing schema tests pass - New comprehensive test covers UInt8, UInt16, UInt32, UInt64 conversion behavior - Test verifies proper casting: UInt8/16→Int32, UInt32→Int64, UInt64→Error <!-- Specify what test covers (unit test, integration test, etc.). If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? -->
1 parent 8f288ab commit 8bc44a7

File tree

1 file changed

+52
-0
lines changed

1 file changed

+52
-0
lines changed

crates/iceberg/src/arrow/schema.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,16 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter {
378378
DataType::Int8 | DataType::Int16 | DataType::Int32 => {
379379
Ok(Type::Primitive(PrimitiveType::Int))
380380
}
381+
DataType::UInt8 | DataType::UInt16 => Ok(Type::Primitive(PrimitiveType::Int)),
382+
DataType::UInt32 => Ok(Type::Primitive(PrimitiveType::Long)),
381383
DataType::Int64 => Ok(Type::Primitive(PrimitiveType::Long)),
384+
DataType::UInt64 => {
385+
// Block uint64 - no safe casting option
386+
Err(Error::new(
387+
ErrorKind::DataInvalid,
388+
"UInt64 is not supported. Use Int64 for values ≤ 9,223,372,036,854,775,807 or Decimal(20,0) for full uint64 range.",
389+
))
390+
}
382391
DataType::Float32 => Ok(Type::Primitive(PrimitiveType::Float)),
383392
DataType::Float64 => Ok(Type::Primitive(PrimitiveType::Double)),
384393
DataType::Decimal128(p, s) => Type::decimal(*p as u32, *s as u32).map_err(|e| {
@@ -1717,6 +1726,49 @@ mod tests {
17171726
}
17181727
}
17191728

1729+
#[test]
1730+
fn test_unsigned_integer_type_conversion() {
1731+
let test_cases = vec![
1732+
(DataType::UInt8, PrimitiveType::Int),
1733+
(DataType::UInt16, PrimitiveType::Int),
1734+
(DataType::UInt32, PrimitiveType::Long),
1735+
];
1736+
1737+
for (arrow_type, expected_iceberg_type) in test_cases {
1738+
let arrow_field = Field::new("test", arrow_type.clone(), false).with_metadata(
1739+
HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1".to_string())]),
1740+
);
1741+
let arrow_schema = ArrowSchema::new(vec![arrow_field]);
1742+
1743+
let iceberg_schema = arrow_schema_to_schema(&arrow_schema).unwrap();
1744+
let iceberg_field = iceberg_schema.as_struct().fields().first().unwrap();
1745+
1746+
assert!(
1747+
matches!(iceberg_field.field_type.as_ref(), Type::Primitive(t) if *t == expected_iceberg_type),
1748+
"Expected {:?} to map to {:?}",
1749+
arrow_type,
1750+
expected_iceberg_type
1751+
);
1752+
}
1753+
1754+
// Test UInt64 blocking
1755+
{
1756+
let arrow_field = Field::new("test", DataType::UInt64, false).with_metadata(
1757+
HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1".to_string())]),
1758+
);
1759+
let arrow_schema = ArrowSchema::new(vec![arrow_field]);
1760+
1761+
let result = arrow_schema_to_schema(&arrow_schema);
1762+
assert!(result.is_err());
1763+
assert!(
1764+
result
1765+
.unwrap_err()
1766+
.to_string()
1767+
.contains("UInt64 is not supported")
1768+
);
1769+
}
1770+
}
1771+
17201772
#[test]
17211773
fn test_datum_conversion() {
17221774
{

0 commit comments

Comments
 (0)