Skip to content

Commit 0769af6

Browse files
authored
Merge branch 'main' into test-machete
2 parents 0977b19 + 20f1640 commit 0769af6

File tree

8 files changed

+54
-105
lines changed

8 files changed

+54
-105
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ apache-avro = { version = "0.20", default-features = false, features = [
6060
], optional = true }
6161
arrow = { workspace = true }
6262
arrow-ipc = { workspace = true }
63-
base64 = "0.22.1"
6463
chrono = { workspace = true }
6564
half = { workspace = true }
6665
hashbrown = { workspace = true }

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
//! Options related to how parquet files should be written
1919
20-
use base64::Engine;
2120
use std::sync::Arc;
2221

2322
use crate::{
@@ -26,6 +25,7 @@ use crate::{
2625
};
2726

2827
use arrow::datatypes::Schema;
28+
use parquet::arrow::encode_arrow_schema;
2929
// TODO: handle once deprecated
3030
#[allow(deprecated)]
3131
use parquet::{
@@ -166,31 +166,6 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
166166
}
167167
}
168168

169-
/// Encodes the Arrow schema into the IPC format, and base64 encodes it
170-
///
171-
/// TODO: use extern parquet's private method, once publicly available.
172-
/// Refer to <https://github.com/apache/arrow-rs/pull/6916>
173-
fn encode_arrow_schema(schema: &Arc<Schema>) -> String {
174-
let options = arrow_ipc::writer::IpcWriteOptions::default();
175-
let mut dictionary_tracker = arrow_ipc::writer::DictionaryTracker::new(true);
176-
let data_gen = arrow_ipc::writer::IpcDataGenerator::default();
177-
let mut serialized_schema = data_gen.schema_to_bytes_with_dictionary_tracker(
178-
schema,
179-
&mut dictionary_tracker,
180-
&options,
181-
);
182-
183-
// manually prepending the length to the schema as arrow uses the legacy IPC format
184-
// TODO: change after addressing ARROW-9777
185-
let schema_len = serialized_schema.ipc_message.len();
186-
let mut len_prefix_schema = Vec::with_capacity(schema_len + 8);
187-
len_prefix_schema.append(&mut vec![255u8, 255, 255, 255]);
188-
len_prefix_schema.append((schema_len as u32).to_le_bytes().to_vec().as_mut());
189-
len_prefix_schema.append(&mut serialized_schema.ipc_message);
190-
191-
base64::prelude::BASE64_STANDARD.encode(&len_prefix_schema)
192-
}
193-
194169
impl ParquetOptions {
195170
/// Convert the global session options, [`ParquetOptions`], into a single write action's [`WriterPropertiesBuilder`].
196171
///

datafusion/core/tests/sql/select.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,3 +344,28 @@ async fn test_version_function() {
344344

345345
assert_eq!(version.value(0), expected_version);
346346
}
347+
348+
/// Regression test for https://github.com/apache/datafusion/issues/17513
349+
/// See https://github.com/apache/datafusion/pull/17520
350+
#[tokio::test]
351+
async fn test_select_no_projection() -> Result<()> {
352+
let tmp_dir = TempDir::new()?;
353+
// `create_ctx_with_partition` creates 10 rows per partition and we chose 1 partition
354+
let ctx = create_ctx_with_partition(&tmp_dir, 1).await?;
355+
356+
let results = ctx.sql("SELECT FROM test").await?.collect().await?;
357+
// We should get all of the rows, just without any columns
358+
let total_rows: usize = results.iter().map(|b| b.num_rows()).sum();
359+
assert_eq!(total_rows, 10);
360+
// Check that none of the batches have any columns
361+
for batch in &results {
362+
assert_eq!(batch.num_columns(), 0);
363+
}
364+
// Sanity check the output, should be just empty columns
365+
assert_snapshot!(batches_to_sort_string(&results), @r"
366+
++
367+
++
368+
++
369+
");
370+
Ok(())
371+
}

datafusion/sql/src/select.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -676,14 +676,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
676676
let mut prepared_select_exprs = vec![];
677677
let mut error_builder = DataFusionErrorBuilder::new();
678678

679-
// Handle the case where no projection is specified but we have a valid FROM clause
680-
// In this case, implicitly add a wildcard projection (SELECT *)
681-
let projection = if projection.is_empty() && !empty_from {
682-
vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())]
683-
} else {
684-
projection
685-
};
686-
687679
for expr in projection {
688680
match self.sql_select_to_rex(expr, plan, empty_from, planner_context) {
689681
Ok(expr) => prepared_select_exprs.push(expr),

datafusion/sqllogictest/test_files/from-first.slt

Lines changed: 0 additions & 55 deletions
This file was deleted.

datafusion/sqllogictest/test_files/projection.slt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,3 +252,31 @@ physical_plan
252252

253253
statement ok
254254
drop table t;
255+
256+
# Regression test for
257+
# https://github.com/apache/datafusion/issues/17513
258+
259+
query I
260+
COPY (select 1 as a, 2 as b)
261+
TO 'test_files/scratch/projection/17513.parquet'
262+
STORED AS PARQUET;
263+
----
264+
1
265+
266+
statement ok
267+
create external table t1 stored as parquet location 'test_files/scratch/projection/17513.parquet';
268+
269+
query TT
270+
explain format indent
271+
select from t1 where t1.a > 1;
272+
----
273+
logical_plan
274+
01)Projection:
275+
02)--Filter: t1.a > Int64(1)
276+
03)----TableScan: t1 projection=[a], partial_filters=[t1.a > Int64(1)]
277+
physical_plan
278+
01)ProjectionExec: expr=[]
279+
02)--CoalesceBatchesExec: target_batch_size=8192
280+
03)----FilterExec: a@0 > 1
281+
04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
282+
05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/projection/17513.parquet]]}, projection=[a], file_type=parquet, predicate=a@0 > 1, pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > 1, required_guarantees=[]

docs/source/user-guide/sql/select.md

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,6 @@ Example:
7575
SELECT t.a FROM table AS t
7676
```
7777

78-
The `FROM` clause can also come before the `SELECT` clause.
79-
Example:
80-
81-
```sql
82-
FROM table AS t
83-
SELECT t.a
84-
```
85-
86-
If the `SELECT` clause is omitted, the `FROM` clause will return all columns from the table.
87-
88-
```sql
89-
FROM table
90-
```
91-
9278
## WHERE clause
9379

9480
Example:

0 commit comments

Comments
 (0)