Skip to content

Commit fc3a964

Browse files
committed
feat: expose data file serialized (#26)
* expose data file serialized * fix
1 parent 32e2f7e commit fc3a964

File tree

3 files changed

+28
-19
lines changed

3 files changed

+28
-19
lines changed

crates/iceberg/src/spec/manifest/_serde.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ impl ManifestEntryV1 {
9898

9999
#[serde_as]
100100
#[derive(Serialize, Deserialize)]
101-
pub(super) struct DataFileSerde {
101+
/// Data file
102+
pub struct DataFileSerde {
102103
#[serde(default)]
103104
content: i32,
104105
file_path: String,
@@ -126,6 +127,7 @@ pub(super) struct DataFileSerde {
126127
}
127128

128129
impl DataFileSerde {
130+
/// Try to convert a `super::DataFile` to a `DataFileSerde`.
129131
pub fn try_from(
130132
value: super::DataFile,
131133
partition_type: &StructType,
@@ -164,6 +166,7 @@ impl DataFileSerde {
164166
})
165167
}
166168

169+
/// Try to convert a `DataFileSerde` to a `super::DataFile`.
167170
pub fn try_into(
168171
self,
169172
partition_spec_id: i32,
@@ -236,7 +239,7 @@ impl DataFileSerde {
236239
}
237240

238241
#[serde_as]
239-
#[derive(Serialize, Deserialize)]
242+
#[derive(Serialize, Deserialize, Clone)]
240243
#[cfg_attr(test, derive(Debug, PartialEq, Eq))]
241244
struct BytesEntry {
242245
key: i32,
@@ -277,7 +280,7 @@ fn to_bytes_entry(v: impl IntoIterator<Item = (i32, Datum)>) -> Result<Vec<Bytes
277280
Ok(bs)
278281
}
279282

280-
#[derive(Serialize, Deserialize)]
283+
#[derive(Serialize, Deserialize, Clone)]
281284
#[cfg_attr(test, derive(Debug, PartialEq, Eq))]
282285
struct I64Entry {
283286
key: i32,

crates/iceberg/src/spec/manifest/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
// under the License.
1717

1818
mod _serde;
19+
/// Data file
20+
pub use _serde::DataFileSerde as SerializedDataFile;
1921

2022
mod data_file;
2123
pub use data_file::*;

crates/iceberg/src/spec/values.rs

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2242,6 +2242,8 @@ mod timestamptz {
22422242
}
22432243

22442244
mod _serde {
2245+
use std::collections::HashMap;
2246+
22452247
use serde::de::Visitor;
22462248
use serde::ser::{SerializeMap, SerializeSeq, SerializeStruct};
22472249
use serde::{Deserialize, Serialize};
@@ -2252,7 +2254,7 @@ mod _serde {
22522254
use crate::spec::{MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, PrimitiveType, Type};
22532255
use crate::{Error, ErrorKind};
22542256

2255-
#[derive(SerializeDerive, DeserializeDerive, Debug)]
2257+
#[derive(SerializeDerive, DeserializeDerive, Debug, Clone)]
22562258
#[serde(transparent)]
22572259
/// Raw literal representation used for serde. The serialize way is used for Avro serializer.
22582260
pub struct RawLiteral(RawLiteralEnum);
@@ -2840,22 +2842,24 @@ mod _serde {
28402842
optional: _,
28412843
}) => match ty {
28422844
Type::Struct(struct_ty) => {
2843-
let iters: Vec<Option<Literal>> = required
2844-
.into_iter()
2845-
.map(|(field_name, value)| {
2846-
let field = struct_ty
2847-
.field_by_name(field_name.as_str())
2848-
.ok_or_else(|| {
2849-
invalid_err_with_reason(
2850-
"record",
2851-
&format!("field {} is not exist", &field_name),
2852-
)
2853-
})?;
2854-
let value = value.try_into(&field.field_type)?;
2855-
Ok(value)
2845+
let mut value_map: HashMap<String, RawLiteralEnum> =
2846+
required.into_iter().collect();
2847+
let values = struct_ty
2848+
.fields()
2849+
.iter()
2850+
.map(|f| {
2851+
if let Some(raw_value) = value_map.remove(&f.name) {
2852+
let value = raw_value.try_into(&f.field_type)?;
2853+
Ok(value)
2854+
} else {
2855+
Err(invalid_err_with_reason(
2856+
"record",
2857+
&format!("field {} is not exist", &f.name),
2858+
))
2859+
}
28562860
})
2857-
.collect::<Result<_, Error>>()?;
2858-
Ok(Some(Literal::Struct(super::Struct::from_iter(iters))))
2861+
.collect::<Result<Vec<_>, Error>>()?;
2862+
Ok(Some(Literal::Struct(super::Struct::from_iter(values))))
28592863
}
28602864
Type::Map(map_ty) => {
28612865
if *map_ty.key_field.field_type != Type::Primitive(PrimitiveType::String) {

0 commit comments

Comments
 (0)