Skip to content

Commit 30eaff3

Browse files
authored
bugfix: select_columns should validate column names (#18623)
## Which issue does this PR close? - Closes #18622 ## Rationale for this change If you call `select_columns` or `drop_columns` and you pass an invalid column name, you should get an error. In 50.3 the unit tests I have updated will pass but in the current branch for 51 release candidate they fail. ## What changes are included in this PR? Adds validation of column names. Updates unit tests. ## Are these changes tested? Yes, included. Also tested with datafusion-python.
1 parent 65f369d commit 30eaff3

File tree

2 files changed

+36
-6
lines changed

2 files changed

+36
-6
lines changed

datafusion/core/src/dataframe/mod.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
5252
use datafusion_common::config::{CsvOptions, JsonOptions};
5353
use datafusion_common::{
5454
exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err,
55-
Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError,
56-
TableReference, UnnestOptions,
55+
unqualified_field_not_found, Column, DFSchema, DataFusionError, ParamValues,
56+
ScalarValue, SchemaError, TableReference, UnnestOptions,
5757
};
5858
use datafusion_expr::select_expr::SelectExpr;
5959
use datafusion_expr::{
@@ -310,11 +310,20 @@ impl DataFrame {
310310
pub fn select_columns(self, columns: &[&str]) -> Result<DataFrame> {
311311
let fields = columns
312312
.iter()
313-
.flat_map(|name| {
314-
self.plan
313+
.map(|name| {
314+
let fields = self
315+
.plan
315316
.schema()
316-
.qualified_fields_with_unqualified_name(name)
317+
.qualified_fields_with_unqualified_name(name);
318+
if fields.is_empty() {
319+
Err(unqualified_field_not_found(name, self.plan.schema()))
320+
} else {
321+
Ok(fields)
322+
}
317323
})
324+
.collect::<Result<Vec<_>, _>>()?
325+
.into_iter()
326+
.flatten()
318327
.collect::<Vec<_>>();
319328
let expr: Vec<Expr> = fields
320329
.into_iter()

datafusion/core/tests/dataframe/mod.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ use datafusion_catalog::TableProvider;
6767
use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
6868
use datafusion_common::{
6969
assert_contains, internal_datafusion_err, Constraint, Constraints, DFSchema,
70-
DataFusionError, ScalarValue, TableReference, UnnestOptions,
70+
DataFusionError, ScalarValue, SchemaError, TableReference, UnnestOptions,
7171
};
7272
use datafusion_common_runtime::SpawnedTask;
7373
use datafusion_datasource::file_format::format_as_file_type;
@@ -305,6 +305,27 @@ async fn select_columns() -> Result<()> {
305305
Ok(())
306306
}
307307

308+
#[tokio::test]
309+
async fn select_columns_with_nonexistent_columns() -> Result<()> {
310+
let t = test_table().await?;
311+
let t2 = t.select_columns(&["canada", "c2", "rocks"]);
312+
313+
match t2 {
314+
Err(DataFusionError::SchemaError(boxed_err, _)) => {
315+
// Verify it's the first invalid column
316+
match boxed_err.as_ref() {
317+
SchemaError::FieldNotFound { field, .. } => {
318+
assert_eq!(field.name(), "canada");
319+
}
320+
_ => panic!("Expected SchemaError::FieldNotFound for 'canada'"),
321+
}
322+
}
323+
_ => panic!("Expected SchemaError"),
324+
}
325+
326+
Ok(())
327+
}
328+
308329
#[tokio::test]
309330
async fn select_expr() -> Result<()> {
310331
// build plan using Table API

0 commit comments

Comments
 (0)