Skip to content

Commit 7af4953

Browse files
committed
move FieldMetadata
1 parent 08b1edf commit 7af4953

File tree

12 files changed

+270
-247
lines changed

12 files changed

+270
-247
lines changed

datafusion/common/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub mod file_options;
4747
pub mod format;
4848
pub mod hash_utils;
4949
pub mod instant;
50+
pub mod metadata;
5051
pub mod nested_struct;
5152
mod null_equality;
5253
pub mod parsers;

datafusion/common/src/metadata.rs

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::{collections::BTreeMap, sync::Arc};
19+
20+
use arrow::datatypes::Field;
21+
use hashbrown::HashMap;
22+
23+
/// Literal metadata
24+
///
25+
/// Stores metadata associated with a literal expressions
26+
/// and is designed to be fast to `clone`.
27+
///
28+
/// This structure is used to store metadata associated with a literal expression, and it
29+
/// corresponds to the `metadata` field on [`Field`].
30+
///
31+
/// # Example: Create [`FieldMetadata`] from a [`Field`]
32+
/// ```
33+
/// # use std::collections::HashMap;
34+
/// # use datafusion_expr::expr::FieldMetadata;
35+
/// # use arrow::datatypes::{Field, DataType};
36+
/// # let field = Field::new("c1", DataType::Int32, true)
37+
/// # .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
38+
/// // Create a new `FieldMetadata` instance from a `Field`
39+
/// let metadata = FieldMetadata::new_from_field(&field);
40+
/// // There is also a `From` impl:
41+
/// let metadata = FieldMetadata::from(&field);
42+
/// ```
43+
///
44+
/// # Example: Update a [`Field`] with [`FieldMetadata`]
45+
/// ```
46+
/// # use datafusion_expr::expr::FieldMetadata;
47+
/// # use arrow::datatypes::{Field, DataType};
48+
/// # let field = Field::new("c1", DataType::Int32, true);
49+
/// # let metadata = FieldMetadata::new_from_field(&field);
50+
/// // Add any metadata from `FieldMetadata` to `Field`
51+
/// let updated_field = metadata.add_to_field(field);
52+
/// ```
53+
///
54+
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
55+
pub struct FieldMetadata {
56+
/// The inner metadata of a literal expression, which is a map of string
57+
/// keys to string values.
58+
///
59+
/// Note this is not a `HashMap` because `HashMap` does not provide
60+
/// implementations for traits like `Debug` and `Hash`.
61+
inner: Arc<BTreeMap<String, String>>,
62+
}
63+
64+
impl Default for FieldMetadata {
65+
fn default() -> Self {
66+
Self::new_empty()
67+
}
68+
}
69+
70+
impl FieldMetadata {
71+
/// Create a new empty metadata instance.
72+
pub fn new_empty() -> Self {
73+
Self {
74+
inner: Arc::new(BTreeMap::new()),
75+
}
76+
}
77+
78+
/// Merges two optional `FieldMetadata` instances, overwriting any existing
79+
/// keys in `m` with keys from `n` if present.
80+
///
81+
/// This function is commonly used in alias operations, particularly for literals
82+
/// with metadata. When creating an alias expression, the metadata from the original
83+
/// expression (such as a literal) is combined with any metadata specified on the alias.
84+
///
85+
/// # Arguments
86+
///
87+
/// * `m` - The first metadata (typically from the original expression like a literal)
88+
/// * `n` - The second metadata (typically from the alias definition)
89+
///
90+
/// # Merge Strategy
91+
///
92+
/// - If both metadata instances exist, they are merged with `n` taking precedence
93+
/// - Keys from `n` will overwrite keys from `m` if they have the same name
94+
/// - If only one metadata instance exists, it is returned unchanged
95+
/// - If neither exists, `None` is returned
96+
///
97+
/// # Example usage
98+
/// ```rust
99+
/// use datafusion_expr::expr::FieldMetadata;
100+
/// use std::collections::BTreeMap;
101+
///
102+
/// // Create metadata for a literal expression
103+
/// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
104+
/// ("source".to_string(), "constant".to_string()),
105+
/// ("type".to_string(), "int".to_string()),
106+
/// ])));
107+
///
108+
/// // Create metadata for an alias
109+
/// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
110+
/// ("description".to_string(), "answer".to_string()),
111+
/// ("source".to_string(), "user".to_string()), // This will override literal's "source"
112+
/// ])));
113+
///
114+
/// // Merge the metadata
115+
/// let merged = FieldMetadata::merge_options(
116+
/// literal_metadata.as_ref(),
117+
/// alias_metadata.as_ref(),
118+
/// );
119+
///
120+
/// // Result contains: {"source": "user", "type": "int", "description": "answer"}
121+
/// assert!(merged.is_some());
122+
/// ```
123+
pub fn merge_options(
124+
m: Option<&FieldMetadata>,
125+
n: Option<&FieldMetadata>,
126+
) -> Option<FieldMetadata> {
127+
match (m, n) {
128+
(Some(m), Some(n)) => {
129+
let mut merged = m.clone();
130+
merged.extend(n.clone());
131+
Some(merged)
132+
}
133+
(Some(m), None) => Some(m.clone()),
134+
(None, Some(n)) => Some(n.clone()),
135+
(None, None) => None,
136+
}
137+
}
138+
139+
/// Create a new metadata instance from a `Field`'s metadata.
140+
pub fn new_from_field(field: &Field) -> Self {
141+
let inner = field
142+
.metadata()
143+
.iter()
144+
.map(|(k, v)| (k.to_string(), v.to_string()))
145+
.collect();
146+
Self {
147+
inner: Arc::new(inner),
148+
}
149+
}
150+
151+
/// Create a new metadata instance from a map of string keys to string values.
152+
pub fn new(inner: BTreeMap<String, String>) -> Self {
153+
Self {
154+
inner: Arc::new(inner),
155+
}
156+
}
157+
158+
/// Get the inner metadata as a reference to a `BTreeMap`.
159+
pub fn inner(&self) -> &BTreeMap<String, String> {
160+
&self.inner
161+
}
162+
163+
/// Return the inner metadata
164+
pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
165+
self.inner
166+
}
167+
168+
/// Adds metadata from `other` into `self`, overwriting any existing keys.
169+
pub fn extend(&mut self, other: Self) {
170+
if other.is_empty() {
171+
return;
172+
}
173+
let other = Arc::unwrap_or_clone(other.into_inner());
174+
Arc::make_mut(&mut self.inner).extend(other);
175+
}
176+
177+
/// Returns true if the metadata is empty.
178+
pub fn is_empty(&self) -> bool {
179+
self.inner.is_empty()
180+
}
181+
182+
/// Returns the number of key-value pairs in the metadata.
183+
pub fn len(&self) -> usize {
184+
self.inner.len()
185+
}
186+
187+
/// Convert this `FieldMetadata` into a `HashMap<String, String>`
188+
pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
189+
self.inner
190+
.iter()
191+
.map(|(k, v)| (k.to_string(), v.to_string()))
192+
.collect()
193+
}
194+
195+
/// Updates the metadata on the Field with this metadata, if it is not empty.
196+
pub fn add_to_field(&self, field: Field) -> Field {
197+
if self.inner.is_empty() {
198+
return field;
199+
}
200+
201+
field.with_metadata(self.to_hashmap())
202+
}
203+
}
204+
205+
impl From<&Field> for FieldMetadata {
206+
fn from(field: &Field) -> Self {
207+
Self::new_from_field(field)
208+
}
209+
}
210+
211+
impl From<BTreeMap<String, String>> for FieldMetadata {
212+
fn from(inner: BTreeMap<String, String>) -> Self {
213+
Self::new(inner)
214+
}
215+
}
216+
217+
impl From<std::collections::HashMap<String, String>> for FieldMetadata {
218+
fn from(map: std::collections::HashMap<String, String>) -> Self {
219+
Self::new(map.into_iter().collect())
220+
}
221+
}
222+
223+
/// From reference
224+
impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
225+
fn from(map: &std::collections::HashMap<String, String>) -> Self {
226+
let inner = map
227+
.iter()
228+
.map(|(k, v)| (k.to_string(), v.to_string()))
229+
.collect();
230+
Self::new(inner)
231+
}
232+
}
233+
234+
/// From hashbrown map
235+
impl From<HashMap<String, String>> for FieldMetadata {
236+
fn from(map: HashMap<String, String>) -> Self {
237+
let inner = map.into_iter().collect();
238+
Self::new(inner)
239+
}
240+
}
241+
242+
impl From<&HashMap<String, String>> for FieldMetadata {
243+
fn from(map: &HashMap<String, String>) -> Self {
244+
let inner = map
245+
.into_iter()
246+
.map(|(k, v)| (k.to_string(), v.to_string()))
247+
.collect();
248+
Self::new(inner)
249+
}
250+
}

datafusion/core/tests/dataframe/mod.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use arrow::error::ArrowError;
3333
use arrow::util::pretty::pretty_format_batches;
3434
use arrow_schema::{SortOptions, TimeUnit};
3535
use datafusion::{assert_batches_eq, dataframe};
36+
use datafusion_common::metadata::FieldMetadata;
3637
use datafusion_functions_aggregate::count::{count_all, count_all_window};
3738
use datafusion_functions_aggregate::expr_fn::{
3839
array_agg, avg, avg_distinct, count, count_distinct, max, median, min, sum,
@@ -71,9 +72,7 @@ use datafusion_common_runtime::SpawnedTask;
7172
use datafusion_datasource::file_format::format_as_file_type;
7273
use datafusion_execution::config::SessionConfig;
7374
use datafusion_execution::runtime_env::RuntimeEnv;
74-
use datafusion_expr::expr::{
75-
FieldMetadata, GroupingSet, NullTreatment, Sort, WindowFunction,
76-
};
75+
use datafusion_expr::expr::{GroupingSet, NullTreatment, Sort, WindowFunction};
7776
use datafusion_expr::var_provider::{VarProvider, VarType};
7877
use datafusion_expr::{
7978
cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder,

datafusion/core/tests/user_defined/user_defined_scalar_functions.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionS
3434
use datafusion::prelude::*;
3535
use datafusion::{execution::registry::FunctionRegistry, test_util};
3636
use datafusion_common::cast::{as_float64_array, as_int32_array};
37+
use datafusion_common::metadata::FieldMetadata;
3738
use datafusion_common::tree_node::{Transformed, TreeNode};
3839
use datafusion_common::utils::take_function_args;
3940
use datafusion_common::{
4041
assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_datafusion_err,
4142
exec_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue,
4243
};
43-
use datafusion_expr::expr::FieldMetadata;
4444
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
4545
use datafusion_expr::{
4646
lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody,

0 commit comments

Comments
 (0)