20
20
use crate :: VariantArray ;
21
21
use arrow:: array:: { ArrayRef , BinaryViewArray , BinaryViewBuilder , NullBufferBuilder , StructArray } ;
22
22
use arrow_schema:: { ArrowError , DataType , Field , Fields } ;
23
- use parquet_variant:: { ListBuilder , ObjectBuilder , Variant , VariantBuilder , VariantBuilderExt } ;
23
+ use parquet_variant:: { ListBuilder , ObjectBuilder , Variant , VariantBuilderExt } ;
24
+ use parquet_variant:: { MetadataBuilder , ParentState , ValueBuilder } ;
24
25
use std:: sync:: Arc ;
25
26
26
27
/// A builder for [`VariantArray`]
@@ -72,12 +73,12 @@ use std::sync::Arc;
72
73
pub struct VariantArrayBuilder {
73
74
/// Nulls
74
75
nulls : NullBufferBuilder ,
75
- /// buffer for all the metadata
76
- metadata_buffer : Vec < u8 > ,
76
+ /// builder for all the metadata
77
+ metadata_builder : MetadataBuilder ,
77
78
/// ending offset for each serialized metadata dictionary in the buffer
78
79
metadata_offsets : Vec < usize > ,
79
- /// buffer for values
80
- value_buffer : Vec < u8 > ,
80
+ /// builder for values
81
+ value_builder : ValueBuilder ,
81
82
/// ending offset for each serialized variant value in the buffer
82
83
value_offsets : Vec < usize > ,
83
84
/// The fields of the final `StructArray`
@@ -95,9 +96,9 @@ impl VariantArrayBuilder {
95
96
96
97
Self {
97
98
nulls : NullBufferBuilder :: new ( row_capacity) ,
98
- metadata_buffer : Vec :: new ( ) , // todo allocation capacity
99
+ metadata_builder : MetadataBuilder :: default ( ) ,
99
100
metadata_offsets : Vec :: with_capacity ( row_capacity) ,
100
- value_buffer : Vec :: new ( ) ,
101
+ value_builder : ValueBuilder :: new ( ) ,
101
102
value_offsets : Vec :: with_capacity ( row_capacity) ,
102
103
fields : Fields :: from ( vec ! [ metadata_field, value_field] ) ,
103
104
}
@@ -107,15 +108,17 @@ impl VariantArrayBuilder {
107
108
pub fn build ( self ) -> VariantArray {
108
109
let Self {
109
110
mut nulls,
110
- metadata_buffer ,
111
+ metadata_builder ,
111
112
metadata_offsets,
112
- value_buffer ,
113
+ value_builder ,
113
114
value_offsets,
114
115
fields,
115
116
} = self ;
116
117
118
+ let metadata_buffer = metadata_builder. into_inner ( ) ;
117
119
let metadata_array = binary_view_array_from_buffers ( metadata_buffer, metadata_offsets) ;
118
120
121
+ let value_buffer = value_builder. into_inner ( ) ;
119
122
let value_array = binary_view_array_from_buffers ( value_buffer, value_offsets) ;
120
123
121
124
// The build the final struct array
@@ -136,14 +139,14 @@ impl VariantArrayBuilder {
136
139
pub fn append_null ( & mut self ) {
137
140
self . nulls . append_null ( ) ;
138
141
// The subfields are expected to be non-nullable according to the parquet variant spec.
139
- self . metadata_offsets . push ( self . metadata_buffer . len ( ) ) ;
140
- self . value_offsets . push ( self . value_buffer . len ( ) ) ;
142
+ self . metadata_offsets . push ( self . metadata_builder . offset ( ) ) ;
143
+ self . value_offsets . push ( self . value_builder . offset ( ) ) ;
141
144
}
142
145
143
146
/// Append the [`Variant`] to the builder as the next row
144
147
pub fn append_variant ( & mut self , variant : Variant ) {
145
148
let mut direct_builder = self . variant_builder ( ) ;
146
- direct_builder. variant_builder . append_value ( variant) ;
149
+ direct_builder. append_value ( variant) ;
147
150
direct_builder. finish ( )
148
151
}
149
152
@@ -194,32 +197,23 @@ impl VariantArrayBuilder {
194
197
///
195
198
/// See [`VariantArrayBuilder::variant_builder`] for an example
196
199
pub struct VariantArrayVariantBuilder < ' a > {
197
- /// was finish called?
198
- finished : bool ,
199
- /// starting offset in the variant_builder's `metadata` buffer
200
- metadata_offset : usize ,
201
- /// starting offset in the variant_builder's `value` buffer
202
- value_offset : usize ,
203
- /// Parent array builder that this variant builder writes to. Buffers
204
- /// have been moved into the variant builder, and must be returned on
205
- /// drop
206
- array_builder : & ' a mut VariantArrayBuilder ,
207
- /// Builder for the in progress variant value, temporarily owns the buffers
208
- /// from `array_builder`
209
- variant_builder : VariantBuilder ,
200
+ parent_state : ParentState < ' a > ,
201
+ metadata_offsets : & ' a mut Vec < usize > ,
202
+ value_offsets : & ' a mut Vec < usize > ,
203
+ nulls : & ' a mut NullBufferBuilder ,
210
204
}
211
205
212
206
impl VariantBuilderExt for VariantArrayVariantBuilder < ' _ > {
213
207
fn append_value < ' m , ' v > ( & mut self , value : impl Into < Variant < ' m , ' v > > ) {
214
- self . variant_builder . append_value ( value) ;
208
+ ValueBuilder :: append_variant ( self . parent_state ( ) , value. into ( ) ) ;
215
209
}
216
210
217
211
fn try_new_list ( & mut self ) -> Result < ListBuilder < ' _ > , ArrowError > {
218
- Ok ( self . variant_builder . new_list ( ) )
212
+ Ok ( ListBuilder :: new ( self . parent_state ( ) , false ) )
219
213
}
220
214
221
215
fn try_new_object ( & mut self ) -> Result < ObjectBuilder < ' _ > , ArrowError > {
222
- Ok ( self . variant_builder . new_object ( ) )
216
+ Ok ( ObjectBuilder :: new ( self . parent_state ( ) , false ) )
223
217
}
224
218
}
225
219
@@ -228,103 +222,40 @@ impl<'a> VariantArrayVariantBuilder<'a> {
228
222
///
229
223
/// Note this is not public as this is a structure that is logically
230
224
/// part of the [`VariantArrayBuilder`] and relies on its internal structure
231
- fn new ( array_builder : & ' a mut VariantArrayBuilder ) -> Self {
232
- // append directly into the metadata and value buffers
233
- let metadata_buffer = std:: mem:: take ( & mut array_builder. metadata_buffer ) ;
234
- let value_buffer = std:: mem:: take ( & mut array_builder. value_buffer ) ;
235
- let metadata_offset = metadata_buffer. len ( ) ;
236
- let value_offset = value_buffer. len ( ) ;
225
+ fn new ( builder : & ' a mut VariantArrayBuilder ) -> Self {
226
+ let parent_state =
227
+ ParentState :: variant ( & mut builder. value_builder , & mut builder. metadata_builder ) ;
237
228
VariantArrayVariantBuilder {
238
- finished : false ,
239
- metadata_offset,
240
- value_offset,
241
- variant_builder : VariantBuilder :: new_with_buffers ( metadata_buffer, value_buffer) ,
242
- array_builder,
229
+ parent_state,
230
+ metadata_offsets : & mut builder. metadata_offsets ,
231
+ value_offsets : & mut builder. value_offsets ,
232
+ nulls : & mut builder. nulls ,
243
233
}
244
234
}
245
235
246
- /// Return a reference to the underlying `VariantBuilder`
247
- pub fn inner ( & self ) -> & VariantBuilder {
248
- & self . variant_builder
249
- }
250
-
251
- /// Return a mutable reference to the underlying `VariantBuilder`
252
- pub fn inner_mut ( & mut self ) -> & mut VariantBuilder {
253
- & mut self . variant_builder
254
- }
255
-
256
236
/// Called to finish the in progress variant and write it to the underlying
257
237
/// buffers
258
238
///
259
239
/// Note if you do not call finish, on drop any changes made to the
260
240
/// underlying buffers will be rolled back.
261
241
pub fn finish ( mut self ) {
262
- self . finished = true ;
263
-
264
- let metadata_offset = self . metadata_offset ;
265
- let value_offset = self . value_offset ;
266
- // get the buffers back from the variant builder
267
- let ( metadata_buffer, value_buffer) = std:: mem:: take ( & mut self . variant_builder ) . finish ( ) ;
268
-
269
- // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost)
270
- assert ! (
271
- metadata_offset <= metadata_buffer. len( ) ,
272
- "metadata length decreased unexpectedly"
273
- ) ;
274
- assert ! (
275
- value_offset <= value_buffer. len( ) ,
276
- "value length decreased unexpectedly"
277
- ) ;
278
-
279
- // commit the changes by putting the
280
- // ending offsets into the parent array builder.
281
- let builder = & mut self . array_builder ;
282
- builder. metadata_offsets . push ( metadata_buffer. len ( ) ) ;
283
- builder. value_offsets . push ( value_buffer. len ( ) ) ;
284
- builder. nulls . append_non_null ( ) ;
242
+ // Record the ending offsets after finishing metadata and finish the parent state.
243
+ let ( value_builder, metadata_builder) = self . parent_state . value_and_metadata_builders ( ) ;
244
+ self . metadata_offsets . push ( metadata_builder. finish ( ) ) ;
245
+ self . value_offsets . push ( value_builder. offset ( ) ) ;
246
+ self . nulls . append_non_null ( ) ;
247
+ self . parent_state . finish ( ) ;
248
+ }
285
249
286
- // put the buffers back into the array builder
287
- builder . metadata_buffer = metadata_buffer ;
288
- builder . value_buffer = value_buffer ;
250
+ fn parent_state ( & mut self ) -> ParentState < ' _ > {
251
+ let ( value_builder , metadata_builder ) = self . parent_state . value_and_metadata_builders ( ) ;
252
+ ParentState :: variant ( value_builder , metadata_builder )
289
253
}
290
254
}
291
255
256
+ // Empty Drop to help with borrow checking - warns users if they forget to call finish()
292
257
impl Drop for VariantArrayVariantBuilder < ' _ > {
293
- /// If the builder was not finished, roll back any changes made to the
294
- /// underlying buffers (by truncating them)
295
- fn drop ( & mut self ) {
296
- if self . finished {
297
- return ;
298
- }
299
-
300
- // if the object was not finished, need to rollback any changes by
301
- // truncating the buffers to the original offsets
302
- let metadata_offset = self . metadata_offset ;
303
- let value_offset = self . value_offset ;
304
-
305
- // get the buffers back from the variant builder
306
- let ( mut metadata_buffer, mut value_buffer) =
307
- std:: mem:: take ( & mut self . variant_builder ) . into_buffers ( ) ;
308
-
309
- // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost) so panic immediately
310
- metadata_buffer
311
- . len ( )
312
- . checked_sub ( metadata_offset)
313
- . expect ( "metadata length decreased unexpectedly" ) ;
314
- value_buffer
315
- . len ( )
316
- . checked_sub ( value_offset)
317
- . expect ( "value length decreased unexpectedly" ) ;
318
-
319
- // Note this truncate is fast because truncate doesn't free any memory:
320
- // it just has to drop elements (and u8 doesn't have a destructor)
321
- metadata_buffer. truncate ( metadata_offset) ;
322
- value_buffer. truncate ( value_offset) ;
323
-
324
- // put the buffers back into the array builder
325
- self . array_builder . metadata_buffer = metadata_buffer;
326
- self . array_builder . value_buffer = value_buffer;
327
- }
258
+ fn drop ( & mut self ) { }
328
259
}
329
260
330
261
fn binary_view_array_from_buffers ( buffer : Vec < u8 > , offsets : Vec < usize > ) -> BinaryViewArray {
@@ -457,12 +388,18 @@ mod test {
457
388
assert_eq ! ( variant_array. len( ) , 2 ) ;
458
389
assert ! ( !variant_array. is_null( 0 ) ) ;
459
390
let variant = variant_array. value ( 0 ) ;
460
- let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
461
- assert_eq ! ( variant. get( "foo" ) . unwrap( ) , Variant :: from( 1i32 ) ) ;
391
+ assert_eq ! (
392
+ variant. get_object_field( "foo" ) ,
393
+ Some ( Variant :: from( 1i32 ) ) ,
394
+ "Expected an object with field \" foo\" , got: {variant:?}"
395
+ ) ;
462
396
463
397
assert ! ( !variant_array. is_null( 1 ) ) ;
464
398
let variant = variant_array. value ( 1 ) ;
465
- let variant = variant. as_object ( ) . expect ( "variant to be an object" ) ;
466
- assert_eq ! ( variant. get( "baz" ) . unwrap( ) , Variant :: from( 3i32 ) ) ;
399
+ assert_eq ! (
400
+ variant. get_object_field( "baz" ) ,
401
+ Some ( Variant :: from( 3i32 ) ) ,
402
+ "Expected an object with field \" baz\" , got: {variant:?}"
403
+ ) ;
467
404
}
468
405
}
0 commit comments