Skip to content

Commit

Permalink
Improve main api doc page, move avro_to_arrow to datasource (#6564)
Browse files Browse the repository at this point in the history
* Improve main api doc page

* fix doc examples

* fmt
  • Loading branch information
alamb authored Jun 12, 2023
1 parent 2467226 commit b8f90fe
Show file tree
Hide file tree
Showing 17 changed files with 42 additions and 31 deletions.
2 changes: 1 addition & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! DataFusion Configuration Options
//! Runtime configuration, via [`ConfigOptions`]
use crate::{DataFusionError, Result};
use std::any::Any;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/src/catalog/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! This module contains interfaces and default implementations
//! of table namespacing concepts, including catalogs and schemas.
//! Interfaces and default implementations of catalogs and schemas.
// TODO(clippy): Having a `catalog::catalog` module path is unclear and ambiguous.
// The parent module should probably be renamed to something that more accurately
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! DataFrame API for building and executing query plans.
//! [`DataFrame`] API for building and executing query plans.
use std::any::Any;
use std::sync::Arc;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -957,7 +957,7 @@ where
mod test {
use crate::arrow::array::Array;
use crate::arrow::datatypes::{Field, TimeUnit};
use crate::avro_to_arrow::{Reader, ReaderBuilder};
use crate::datasource::avro_to_arrow::{Reader, ReaderBuilder};
use arrow::datatypes::DataType;
use datafusion_common::cast::{
as_int32_array, as_int64_array, as_list_array, as_timestamp_microsecond_array,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.

//! This module contains utilities to manipulate avro metadata.
//! This module contains code for reading [Avro] data into `RecordBatch`es
//!
//! [Avro]: https://avro.apache.org/docs/1.2.0/
#[cfg(feature = "avro")]
mod arrow_array_reader;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,21 @@ impl ReaderBuilder {
/// # Example
///
/// ```
/// extern crate apache_avro;
///
/// use std::fs::File;
///
/// fn example() -> crate::datafusion::avro_to_arrow::Reader<'static, File> {
/// use datafusion::datasource::avro_to_arrow::{Reader, ReaderBuilder};
///
/// fn example() -> Reader<'static, File> {
/// let file = File::open("test/data/basic.avro").unwrap();
///
/// // create a builder, inferring the schema with the first 100 records
/// let builder = crate::datafusion::avro_to_arrow::ReaderBuilder::new().read_schema().with_batch_size(100);
/// let builder = ReaderBuilder::new()
/// .read_schema()
/// .with_batch_size(100);
///
/// let reader = builder.build::<File>(file).unwrap();
/// let reader = builder
/// .build::<File>(file)
/// .unwrap();
///
/// reader
/// }
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/avro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use datafusion_physical_expr::PhysicalExpr;
use object_store::{GetResult, ObjectMeta, ObjectStore};

use super::FileFormat;
use crate::avro_to_arrow::read_avro_schema_from_reader;
use crate::datasource::avro_to_arrow::read_avro_schema_from_reader;
use crate::datasource::physical_plan::{AvroExec, FileScanConfig};
use crate::error::Result;
use crate::execution::context::SessionState;
Expand Down
5 changes: 4 additions & 1 deletion datafusion/core/src/datasource/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@
// specific language governing permissions and limitations
// under the License.

//! DataFusion data sources
//! DataFusion data sources: [`TableProvider`] and [`ListingTable`]
//!
//! [`ListingTable`]: crate::datasource::listing::ListingTable
// TODO(clippy): Having a `datasource::datasource` module path is unclear and ambiguous.
// The child module should probably be renamed to something that more accurately
// describes its content. Something along the lines of `provider`, or `providers`.
#![allow(clippy::module_inception)]
pub mod avro_to_arrow;
pub mod datasource;
pub mod default_table_source;
pub mod empty;
Expand Down
8 changes: 3 additions & 5 deletions datafusion/core/src/datasource/physical_plan/avro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ impl ExecutionPlan for AvroExec {
#[cfg(feature = "avro")]
mod private {
use super::*;
use crate::datasource::avro_to_arrow::Reader as AvroReader;
use crate::datasource::physical_plan::file_stream::{FileOpenFuture, FileOpener};
use crate::datasource::physical_plan::FileMeta;
use bytes::Buf;
Expand All @@ -179,11 +180,8 @@ mod private {
}

impl AvroConfig {
fn open<R: std::io::Read>(
&self,
reader: R,
) -> Result<crate::avro_to_arrow::Reader<'static, R>> {
crate::avro_to_arrow::Reader::try_new(
fn open<R: std::io::Read>(&self, reader: R) -> Result<AvroReader<'static, R>> {
AvroReader::try_new(
reader,
self.schema.clone(),
self.batch_size,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
// specific language governing permissions and limitations
// under the License.

//! DataFusion error types
//! DataFusion error type [`DataFusionError`] and [`Result`].
pub use datafusion_common::{DataFusionError, Result, SharedResult};
7 changes: 4 additions & 3 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,12 @@
//! and improve compilation times. The crates are:
//!
//! * [datafusion_common]: Common traits and types
//! * [datafusion_execution]: State needed for execution
//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure
//! * [datafusion_execution]: State and structures needed for execution
//! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
//! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
//! * [datafusion_sql]: [`SqlToRel`] SQL planner
//! * [datafusion_row]: Row based representation
//! * [datafusion_sql]: SQL planner ([`SqlToRel`])
//!
//! [sqlparser]: https://docs.rs/sqlparser/latest/sqlparser
//! [`SqlToRel`]: sql::planner::SqlToRel
Expand All @@ -412,7 +413,6 @@ pub const DATAFUSION_VERSION: &str = env!("CARGO_PKG_VERSION");
extern crate core;
extern crate sqlparser;

pub mod avro_to_arrow;
pub mod catalog;
pub mod dataframe;
pub mod datasource;
Expand All @@ -431,6 +431,7 @@ pub use parquet;
// re-export DataFusion crates
pub use datafusion_common as common;
pub use datafusion_common::config;
pub use datafusion_execution;
pub use datafusion_expr as logical_expr;
pub use datafusion_optimizer as optimizer;
pub use datafusion_physical_expr as physical_expr;
Expand Down
9 changes: 6 additions & 3 deletions datafusion/core/src/physical_optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
// specific language governing permissions and limitations
// under the License.

//! This module contains a query optimizer that operates against a physical plan and applies
//! rules to a physical plan, such as "Repartition".
//! Optimizer that rewrites [`ExecutionPlan`]s.
//!
//! These rules take advantage of physical plan properties , such as
//! "Repartition" or "Sortedness"
//!
//! [`ExecutionPlan`]: crate::physical_plan::ExecutionPlan
pub mod aggregate_statistics;
pub mod coalesce_batches;
pub mod combine_partial_final_agg;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.pub},

//! A "prelude" for users of the datafusion crate.
//! DataFusion "prelude" to simplify importing common types.
//!
//! Like the standard library's prelude, this module simplifies importing of
//! common items. Unlike the standard prelude, the contents of this module must
Expand Down
7 changes: 4 additions & 3 deletions datafusion/core/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
// specific language governing permissions and limitations
// under the License.

//! ScalarValue reimported from datafusion-common to easy migration
//! when datafusion was split into several different crates
//! [`ScalarValue`] single value representation.
//!
//! Note this is reimported from the datafusion-common crate for easy
//! migration when datafusion was split into several different crates
pub use datafusion_common::{ScalarType, ScalarValue};
2 changes: 1 addition & 1 deletion datafusion/core/src/variable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
// specific language governing permissions and limitations
// under the License.

//! Variable provider
//! Variable provider for `@name` and `@@name` style runtime values.
pub use datafusion_physical_expr::var_provider::{VarProvider, VarType};
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/var_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub enum VarType {
UserDefined,
}

/// A var provider for @variable
/// A var provider for `@variable` and `@@variable` runtime values.
pub trait VarProvider: std::fmt::Debug {
/// Get variable value
fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue>;
Expand Down

0 comments on commit b8f90fe

Please sign in to comment.