From 29e563a049ce1174ebabf8b56f42df652d0aa01f Mon Sep 17 00:00:00 2001
From: QP Hou <dave2008713@gmail.com>
Date: Mon, 21 Feb 2022 19:51:18 -0800
Subject: [PATCH] add support for datatypes serde (#858)

---
 Cargo.toml                     | 1 +
 src/datatypes/field.rs         | 4 ++++
 src/datatypes/mod.rs           | 7 +++++++
 src/datatypes/physical_type.rs | 5 +++++
 src/datatypes/schema.rs        | 4 ++++
 src/types/mod.rs               | 4 ++++
 6 files changed, 25 insertions(+)

diff --git a/Cargo.toml b/Cargo.toml
index 90a15f8c2ad..07fe03ca132 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -213,6 +213,7 @@ compute = [
 ]
 benchmarks = ["rand"]
 simd = ["packed_simd"]
+serde_types = ["serde", "serde_derive"]
 
 [package.metadata.cargo-all-features]
 allowlist = ["compute", "compute_sort", "compute_hash", "compute_nullif"]
diff --git a/src/datatypes/field.rs b/src/datatypes/field.rs
index f9ef6b1f0cc..07d1b760211 100644
--- a/src/datatypes/field.rs
+++ b/src/datatypes/field.rs
@@ -1,5 +1,8 @@
 use super::{DataType, Metadata};
 
+#[cfg(feature = "serde_types")]
+use serde_derive::{Deserialize, Serialize};
+
 /// Represents Arrow's metadata of a "column".
 ///
 /// A [`Field`] is the closest representation of the traditional "column": a logical type
@@ -9,6 +12,7 @@ use super::{DataType, Metadata};
 /// Almost all IO in this crate uses [`Field`] to represent logical information about the data
 /// to be serialized.
 #[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub struct Field {
     /// Its name
     pub name: String,
diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs
index 8af8245d6c4..e7e2c9b98e8 100644
--- a/src/datatypes/mod.rs
+++ b/src/datatypes/mod.rs
@@ -12,6 +12,9 @@ pub use schema::Schema;
 use std::collections::BTreeMap;
 use std::sync::Arc;
 
+#[cfg(feature = "serde_types")]
+use serde_derive::{Deserialize, Serialize};
+
 /// typedef for [BTreeMap<String, String>] denoting [`Field`]'s and [`Schema`]'s metadata.
 pub type Metadata = BTreeMap<String, String>;
 /// typedef fpr [Option<(String, Option<String>)>] descr
@@ -26,6 +29,7 @@ pub(crate) type Extension = Option<(String, Option<String>)>;
 /// The [`DataType::Extension`] is special in that it augments a [`DataType`] with metadata to support custom types.
 /// Use `to_logical_type` to desugar such type and return its correspoding logical type.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub enum DataType {
     /// Null type
     Null,
@@ -156,6 +160,7 @@ pub enum DataType {
 
 /// Mode of [`DataType::Union`]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub enum UnionMode {
     /// Dense union
     Dense,
@@ -187,6 +192,7 @@ impl UnionMode {
 
 /// The time units defined in Arrow.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub enum TimeUnit {
     /// Time in seconds.
     Second,
@@ -200,6 +206,7 @@ pub enum TimeUnit {
 
 /// Interval units defined in Arrow
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub enum IntervalUnit {
     /// The number of elapsed whole months.
     YearMonth,
diff --git a/src/datatypes/physical_type.rs b/src/datatypes/physical_type.rs
index 7e15cb19629..828df9541f0 100644
--- a/src/datatypes/physical_type.rs
+++ b/src/datatypes/physical_type.rs
@@ -1,9 +1,13 @@
 pub use crate::types::PrimitiveType;
 
+#[cfg(feature = "serde_types")]
+use serde_derive::{Deserialize, Serialize};
+
 /// The set of physical types: unique in-memory representations of an Arrow array.
 /// A physical type has a one-to-many relationship with a [`crate::datatypes::DataType`] and
 /// a one-to-one mapping to each struct in this crate that implements [`crate::array::Array`].
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub enum PhysicalType {
     /// A Null with no allocation.
     Null,
@@ -51,6 +55,7 @@ impl PhysicalType {
 /// the set of valid indices types of a dictionary-encoded Array.
 /// Each type corresponds to a variant of [`crate::array::DictionaryArray`].
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub enum IntegerType {
     /// A signed 8-bit integer.
     Int8,
diff --git a/src/datatypes/schema.rs b/src/datatypes/schema.rs
index 38dcb5ccd41..baa04476360 100644
--- a/src/datatypes/schema.rs
+++ b/src/datatypes/schema.rs
@@ -1,11 +1,15 @@
 use super::{Field, Metadata};
 
+#[cfg(feature = "serde_types")]
+use serde_derive::{Deserialize, Serialize};
+
 /// An ordered sequence of [`Field`]s with associated [`Metadata`].
 ///
 /// [`Schema`] is an abstration used to read from, and write to, Arrow IPC format,
 /// Apache Parquet, and Apache Avro. All these formats have a concept of a schema
 /// with fields and metadata.
 #[derive(Debug, Clone, PartialEq, Eq, Default)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub struct Schema {
     /// The fields composing this schema.
     pub fields: Vec<Field>,
diff --git a/src/types/mod.rs b/src/types/mod.rs
index 4ba1584acf9..4b794c55a1f 100644
--- a/src/types/mod.rs
+++ b/src/types/mod.rs
@@ -30,8 +30,12 @@ pub use native::*;
 mod offset;
 pub use offset::*;
 
+#[cfg(feature = "serde_types")]
+use serde_derive::{Deserialize, Serialize};
+
 /// The set of all implementations of the sealed trait [`NativeType`].
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Serialize, Deserialize))]
 pub enum PrimitiveType {
     /// A signed 8-bit integer.
     Int8,