Skip to content

Commit

Permalink
Simplify serialization by removing redundant PrimitiveScalarValue (a…
Browse files Browse the repository at this point in the history
…pache#3612)

* Simplify serialization by removing redundant PrimitiveScalarValue

* comments

* it compiles

* Add additional scalar value null construction

* reserve old field name
  • Loading branch information
alamb authored and Dandandan committed Nov 5, 2022
1 parent 3bf3efd commit f966d7c
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 435 deletions.
21 changes: 21 additions & 0 deletions datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2238,6 +2238,15 @@ impl_try_from!(Float32, f32);
impl_try_from!(Float64, f64);
impl_try_from!(Boolean, bool);

impl TryFrom<DataType> for ScalarValue {
type Error = DataFusionError;

/// Create a Null instance of ScalarValue for this datatype
fn try_from(datatype: DataType) -> Result<Self> {
(&datatype).try_into()
}
}

impl TryFrom<&DataType> for ScalarValue {
type Error = DataFusionError;

Expand All @@ -2260,6 +2269,9 @@ impl TryFrom<&DataType> for ScalarValue {
}
DataType::Utf8 => ScalarValue::Utf8(None),
DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
DataType::Binary => ScalarValue::Binary(None),
DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
DataType::LargeBinary => ScalarValue::LargeBinary(None),
DataType::Date32 => ScalarValue::Date32(None),
DataType::Date64 => ScalarValue::Date64(None),
DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64(None),
Expand All @@ -2275,6 +2287,15 @@ impl TryFrom<&DataType> for ScalarValue {
DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
ScalarValue::TimestampNanosecond(None, tz_opt.clone())
}
DataType::Interval(IntervalUnit::YearMonth) => {
ScalarValue::IntervalYearMonth(None)
}
DataType::Interval(IntervalUnit::DayTime) => {
ScalarValue::IntervalDayTime(None)
}
DataType::Interval(IntervalUnit::MonthDayNano) => {
ScalarValue::IntervalMonthDayNano(None)
}
DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
index_type.clone(),
Box::new(value_type.as_ref().try_into()?),
Expand Down
55 changes: 10 additions & 45 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -771,9 +771,13 @@ message StructValue {
}

message ScalarValue{
oneof value {
// Null value of any type (type is encoded)
PrimitiveScalarType null_value = 19;
// was PrimitiveScalarType null_value = 19;
reserved 19;

oneof value {
// was PrimitiveScalarType null_value = 19;
// Null value of any type
ArrowType null_value = 33;

bool bool_value = 1;
string utf8_value = 2;
Expand All @@ -788,7 +792,7 @@ message ScalarValue{
uint64 uint64_value = 11;
float float32_value = 12;
double float64_value = 13;
//Literal Date32 value always has a unit of day
// Literal Date32 value always has a unit of day
int32 date_32_value = 14;
ScalarListValue list_value = 17;
//WAS: ScalarType null_list_value = 18;
Expand All @@ -813,48 +817,9 @@ message Decimal128{
int64 s = 3;
}

// Contains all valid datafusion scalar type except for
// List
enum PrimitiveScalarType{

BOOL = 0; // arrow::Type::BOOL
UINT8 = 1; // arrow::Type::UINT8
INT8 = 2; // arrow::Type::INT8
UINT16 = 3; // represents arrow::Type fields in src/arrow/type.h
INT16 = 4;
UINT32 = 5;
INT32 = 6;
UINT64 = 7;
INT64 = 8;
FLOAT32 = 9;
FLOAT64 = 10;
UTF8 = 11;
LARGE_UTF8 = 12;
DATE32 = 13;
TIMESTAMP_MICROSECOND = 14;
TIMESTAMP_NANOSECOND = 15;
NULL = 16;
DECIMAL128 = 17;
DATE64 = 20;
TIMESTAMP_SECOND = 21;
TIMESTAMP_MILLISECOND = 22;
INTERVAL_YEARMONTH = 23;
INTERVAL_DAYTIME = 24;
INTERVAL_MONTHDAYNANO = 28;

BINARY = 25;
LARGE_BINARY = 26;

TIME64 = 27;
}


// Broke out into multiple message types so that type
// metadata did not need to be in separate message
// All types that are of the empty message types contain no additional metadata
// about the type
// Serialized data type
message ArrowType{
oneof arrow_type_enum{
oneof arrow_type_enum {
EmptyMessage NONE = 1; // arrow::Type::NA
EmptyMessage BOOL = 2; // arrow::Type::BOOL
EmptyMessage UINT8 = 3; // arrow::Type::UINT8
Expand Down
Loading

0 comments on commit f966d7c

Please sign in to comment.