Skip to content

Commit

Permalink
rework default statistics calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Nov 17, 2023
1 parent 03eb51e commit c43c734
Showing 1 changed file with 22 additions and 20 deletions.
42 changes: 22 additions & 20 deletions datafusion/core/src/datasource/listing/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -939,20 +939,23 @@ impl ListingTable {
}))
.await?;
let file_list = stream::iter(file_list).flatten();

// collect the statistics if required by the config
let files = file_list
.map(|part_file| async {
let part_file = part_file?;
let mut statistics_result = Statistics::new_unknown(&self.file_schema);
if self.options.collect_stat {
let (files, statistics) = if !self.options.collect_stat {
(
file_list.try_collect().await?,
Statistics::new_unknown(&self.file_schema),
)
} else {
let files = file_list
.map(|part_file| async {
let part_file = part_file?;
let statistics_cache = self.collected_statistics.clone();
match statistics_cache.get_with_extra(
let statistics_result = match statistics_cache.get_with_extra(
&part_file.object_meta.location,
&part_file.object_meta,
) {
Some(statistics) => {
statistics_result = statistics.as_ref().clone()
}
Some(statistics) => statistics.as_ref().clone(),
None => {
let statistics = self
.options
Expand All @@ -969,18 +972,17 @@ impl ListingTable {
statistics.clone().into(),
&part_file.object_meta,
);
statistics_result = statistics;
statistics
}
}
}
Ok((part_file, statistics_result))
as Result<(PartitionedFile, Statistics)>
})
.boxed()
.buffered(ctx.config_options().execution.meta_fetch_concurrency);

let (files, statistics) =
get_statistics_with_limit(files, self.schema(), limit).await?;
};
Ok((part_file, statistics_result))
as Result<(PartitionedFile, Statistics)>
})
.boxed()
.buffered(ctx.config_options().execution.meta_fetch_concurrency);

get_statistics_with_limit(files, self.schema(), limit).await?
};

Ok((
split_files(files, self.options.target_partitions),
Expand Down

0 comments on commit c43c734

Please sign in to comment.