Skip to content

Commit c1364fc

Browse files
fvaleyeMaxxen
authored andcommitted
performance(memory): improve memory allocation when the size is known
Optimize memory allocation by using Vec::with_capacity() to improve the performance on hot paths and large data volumes. The benefits would be more on a large data volume (especially struct, map, array).
1 parent 6e49ed2 commit c1364fc

File tree

4 files changed

+44
-43
lines changed

4 files changed

+44
-43
lines changed

crates/duckdb/src/appender/arrow.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ impl Appender<'_> {
2828
/// Will return `Err` if append column count not the same with the table schema
2929
#[inline]
3030
pub fn append_record_batch(&mut self, record_batch: RecordBatch) -> Result<()> {
31-
let logical_types: Vec<LogicalTypeHandle> = record_batch
32-
.schema()
33-
.fields()
34-
.iter()
35-
.map(|field| {
31+
let fields = record_batch.schema().fields();
32+
let capacity = fields.len();
33+
let mut logical_types = Vec::with_capacity(capacity);
34+
for field in fields.iter() {
35+
logical_types.push(
3636
to_duckdb_logical_type(field.data_type())
37-
.map_err(|_op| Error::ArrowTypeToDuckdbType(field.to_string(), field.data_type().clone()))
38-
})
39-
.collect::<Result<Vec<_>, _>>()?;
37+
.map_err(|_op| Error::ArrowTypeToDuckdbType(field.to_string(), field.data_type().clone()))?,
38+
);
39+
}
4040

4141
let vector_size = unsafe { duckdb_vector_size() } as usize;
4242
let num_rows = record_batch.num_rows();

crates/duckdb/src/core/data_chunk.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ impl DataChunkHandle {
3434
/// Create a new [DataChunkHandle] with the given [LogicalTypeHandle]s.
3535
pub fn new(logical_types: &[LogicalTypeHandle]) -> Self {
3636
let num_columns = logical_types.len();
37-
let mut c_types = logical_types.iter().map(|t| t.ptr).collect::<Vec<_>>();
37+
let mut c_types = Vec::with_capacity(num_columns);
38+
c_types.extend(logical_types.iter().map(|t| t.ptr));
3839
let ptr = unsafe { duckdb_create_data_chunk(c_types.as_mut_ptr(), num_columns as u64) };
3940
Self { ptr, owned: true }
4041
}

crates/duckdb/src/types/mod.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,12 @@ impl From<&DataType> for Type {
135135
Self::Array(Box::new(Self::from(field.data_type())), (*size).try_into().unwrap())
136136
}
137137
// DataType::LargeList(_) => Self::LargeList,
138-
DataType::Struct(inner) => Self::Struct(
139-
inner
140-
.iter()
141-
.map(|f| (f.name().to_owned(), Self::from(f.data_type())))
142-
.collect(),
143-
),
138+
DataType::Struct(inner) => {
139+
let capacity = inner.len();
140+
let mut struct_vec = Vec::with_capacity(capacity);
141+
struct_vec.extend(inner.iter().map(|f| (f.name().to_owned(), Self::from(f.data_type()))));
142+
Self::Struct(struct_vec)
143+
}
144144
DataType::LargeList(inner) => Self::List(Box::new(Self::from(inner.data_type()))),
145145
DataType::Union(_, _) => Self::Union,
146146
DataType::Decimal128(..) => Self::Decimal,

crates/duckdb/src/types/value_ref.rs

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -256,40 +256,41 @@ impl From<ValueRef<'_>> for Value {
256256
Self::Enum(dict_values.value(dict_key).to_string())
257257
}
258258
ValueRef::Struct(items, idx) => {
259-
let value: Vec<(String, Self)> = items
260-
.columns()
261-
.iter()
262-
.zip(items.fields().iter().map(|f| f.name().to_owned()))
263-
.map(|(column, name)| -> (String, Self) {
264-
(name, Row::value_ref_internal(idx, 0, column).to_owned())
265-
})
266-
.collect();
259+
let capacity = items.columns().len();
260+
let mut value = Vec::with_capacity(capacity);
261+
value.extend(
262+
items
263+
.columns()
264+
.iter()
265+
.zip(items.fields().iter().map(|f| f.name().to_owned()))
266+
.map(|(column, name)| -> (String, Self) {
267+
(name, Row::value_ref_internal(idx, 0, column).to_owned())
268+
}),
269+
);
267270
Self::Struct(OrderedMap::from(value))
268271
}
269272
ValueRef::Map(arr, idx) => {
270273
let keys = arr.keys();
271274
let values = arr.values();
272275
let offsets = arr.offsets();
273276
let range = offsets[idx]..offsets[idx + 1];
274-
Self::Map(OrderedMap::from(
275-
range
276-
.map(|row| {
277-
let row = row.try_into().unwrap();
278-
let key = Row::value_ref_internal(row, idx, keys).to_owned();
279-
let value = Row::value_ref_internal(row, idx, values).to_owned();
280-
(key, value)
281-
})
282-
.collect::<Vec<_>>(),
283-
))
277+
let capacity = range.len();
278+
let mut map_vec = Vec::with_capacity(capacity);
279+
map_vec.extend(range.map(|row| {
280+
let row = row.try_into().unwrap();
281+
let key = Row::value_ref_internal(row, idx, keys).to_owned();
282+
let value = Row::value_ref_internal(row, idx, values).to_owned();
283+
(key, value)
284+
}));
285+
Self::Map(OrderedMap::from(map_vec))
284286
}
285287
ValueRef::Array(items, idx) => {
286288
let value_length = usize::try_from(items.value_length()).unwrap();
287289
let range = (idx * value_length)..((idx + 1) * value_length);
288-
Self::Array(
289-
range
290-
.map(|row| Row::value_ref_internal(row, idx, items.values()).to_owned())
291-
.collect(),
292-
)
290+
let capacity = value_length;
291+
let mut array_vec = Vec::with_capacity(capacity);
292+
array_vec.extend(range.map(|row| Row::value_ref_internal(row, idx, items.values()).to_owned()));
293+
Self::Array(array_vec)
293294
}
294295
ValueRef::Union(column, idx) => {
295296
let column = column.as_any().downcast_ref::<UnionArray>().unwrap();
@@ -304,11 +305,10 @@ impl From<ValueRef<'_>> for Value {
304305
}
305306

306307
fn from_list(start: usize, end: usize, idx: usize, values: &ArrayRef) -> Value {
307-
Value::List(
308-
(start..end)
309-
.map(|row| Row::value_ref_internal(row, idx, values).to_owned())
310-
.collect(),
311-
)
308+
let capacity = end - start;
309+
let mut list_vec = Vec::with_capacity(capacity);
310+
list_vec.extend((start..end).map(|row| Row::value_ref_internal(row, idx, values).to_owned()));
311+
Value::List(list_vec)
312312
}
313313

314314
impl<'a> From<&'a str> for ValueRef<'a> {

0 commit comments

Comments
 (0)