apache · cj-zhukov · Oct 26, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
@@ -32,30 +32,6 @@ rust-version = { workspace = true }
 [lints]
 workspace = true
 
-[[example]]
-name = "flight_sql_server"
-path = "examples/flight/flight_sql_server.rs"
-
-[[example]]
-name = "flight_server"
-path = "examples/flight/flight_server.rs"
-
-[[example]]
-name = "flight_client"
-path = "examples/flight/flight_client.rs"
-
-[[example]]
-name = "dataframe_to_s3"
-path = "examples/external_dependency/dataframe-to-s3.rs"
-
-[[example]]
-name = "query_aws_s3"
-path = "examples/external_dependency/query-aws-s3.rs"
-
-[[example]]
-name = "custom_file_casts"
-path = "examples/custom_file_casts.rs"
-
 [dev-dependencies]
 arrow = { workspace = true }
 # arrow_schema is required for record_batch! macro :sad:
@@ -67,7 +43,6 @@ dashmap = { workspace = true }
 # note only use main datafusion crate for examples
 base64 = "0.22.1"
 datafusion = { workspace = true, default-features = true, features = ["parquet_encryption"] }
-datafusion-ffi = { workspace = true }
 datafusion-physical-expr-adapter = { workspace = true }
 datafusion-proto = { workspace = true }
 env_logger = { workspace = true }

diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
@@ -40,58 +40,61 @@ git submodule update --init
 cd datafusion-examples/examples
 
 # Run the `dataframe` example:
-# ... use the equivalent for other examples
-cargo run --example dataframe
+# ... use the equivalent for other examples with corresponding subcommand
+cargo run --example dataframe -- dataframe
 ```
 
 ## Single Process
 
-- [`advanced_udaf.rs`](examples/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF)
-- [`advanced_udf.rs`](examples/advanced_udf.rs): Define and invoke a more complicated User Defined Scalar Function (UDF)
-- [`advanced_udwf.rs`](examples/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)
-- [`advanced_parquet_index.rs`](examples/advanced_parquet_index.rs): Creates a detailed secondary index that covers the contents of several parquet files
-- [`async_udf.rs`](examples/async_udf.rs): Define and invoke an asynchronous User Defined Scalar Function (UDF)
-- [`analyzer_rule.rs`](examples/analyzer_rule.rs): Use a custom AnalyzerRule to change a query's semantics (row level access control)
-- [`catalog.rs`](examples/catalog.rs): Register the table into a custom catalog
-- [`composed_extension_codec`](examples/composed_extension_codec.rs): Example of using multiple extension codecs for serialization / deserialization
-- [`csv_sql_streaming.rs`](examples/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file
-- [`csv_json_opener.rs`](examples/csv_json_opener.rs): Use low level `FileOpener` APIs to read CSV/JSON into Arrow `RecordBatch`es
-- [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
-- [`custom_file_casts.rs`](examples/custom_file_casts.rs): Implement custom casting rules to adapt file schemas
-- [`custom_file_format.rs`](examples/custom_file_format.rs): Write data to a custom file format
-- [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
-- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API against parquet files, csv files, and in-memory data, including multiple subqueries. Also demonstrates the various methods to write out a DataFrame to a table, parquet file, csv file, and json file.
-- [`default_column_values.rs`](examples/default_column_values.rs): Implement custom default value handling for missing columns using field metadata and PhysicalExprAdapter
-- [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results (Arrow ArrayRefs) into Rust structs
-- [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s
-- [`file_stream_provider.rs`](examples/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks.
-- [`flight_sql_server.rs`](examples/flight/flight_sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from JDBC clients
-- [`function_factory.rs`](examples/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
-- [`memory_pool_tracking.rs`](examples/memory_pool_tracking.rs): Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages
-- [`memory_pool_execution_plan.rs`](examples/memory_pool_execution_plan.rs): Shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
-- [`optimizer_rule.rs`](examples/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates
-- [`parquet_embedded_index.rs`](examples/parquet_embedded_index.rs): Store a custom index inside a Parquet file and use it to speed up queries
-- [`parquet_encrypted.rs`](examples/parquet_encrypted.rs): Read and write encrypted Parquet files using DataFusion
-- [`parquet_encrypted_with_kms.rs`](examples/parquet_encrypted_with_kms.rs): Read and write encrypted Parquet files using an encryption factory
-- [`parquet_index.rs`](examples/parquet_index.rs): Create an secondary index over several parquet files and use it to speed up queries
-- [`parquet_exec_visitor.rs`](examples/parquet_exec_visitor.rs): Extract statistics by visiting an ExecutionPlan after execution
-- [`parse_sql_expr.rs`](examples/parse_sql_expr.rs): Parse SQL text into DataFusion `Expr`.
-- [`plan_to_sql.rs`](examples/plan_to_sql.rs): Generate SQL from DataFusion `Expr` and `LogicalPlan`
-- [`planner_api.rs`](examples/planner_api.rs) APIs to manipulate logical and physical plans
-- [`pruning.rs`](examples/pruning.rs): Use pruning to rule out files based on statistics
-- [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3
-- [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP
-- [`regexp.rs`](examples/regexp.rs): Examples of using regular expression functions
-- [`remote_catalog.rs`](examples/regexp.rs): Examples of interfacing with a remote catalog (e.g. over a network)
-- [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
-- [`simple_udf.rs`](examples/simple_udf.rs): Define and invoke a User Defined Scalar Function (UDF)
-- [`simple_udfw.rs`](examples/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF)
-- [`sql_analysis.rs`](examples/sql_analysis.rs): Analyse SQL queries with DataFusion structures
-- [`sql_frontend.rs`](examples/sql_frontend.rs): Create LogicalPlans (only) from sql strings
-- [`sql_dialect.rs`](examples/sql_dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser`
-- [`sql_query.rs`](examples/memtable.rs): Query data using SQL (in memory `RecordBatches`, local Parquet files)
-- [`date_time_function.rs`](examples/date_time_function.rs): Examples of date-time related functions and queries.
+- [`advanced_udaf.rs`](examples/advanced_udaf/udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF)
+- [`advanced_udf.rs`](examples/advanced_udaf/udf.rs): Define and invoke a more complicated User Defined Scalar Function (UDF)
+- [`advanced_udwf.rs`](examples/advanced_udaf/udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)
+- [`advanced_parquet_index.rs`](examples/parquet/advanced_index.rs): Create a detailed secondary index that covers the contents of several parquet files
+- [`async_udf.rs`](examples/advanced_udaf/async_udf.rs): Define and invoke an asynchronous User Defined Scalar Function (UDF)
+- [`analyzer_rule.rs`](examples/query_planning/analyzer_rule.rs): Use a custom AnalyzerRule to change a query's semantics (row level access control)
+- [`catalog.rs`](examples/data_io/catalog.rs): Register the table into a custom catalog
+- [`composed_extension_codec`](examples/proto/composed_extension_codec.rs): Example of using multiple extension codecs for serialization / deserialization
+- [`csv_sql_streaming.rs`](examples/custom_data_source/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file
+- [`csv_json_opener.rs`](examples/custom_data_source/csv_json_opener.rs): Use low level `FileOpener` APIs to read CSV/JSON into Arrow `RecordBatch`es
+- [`custom_datasource.rs`](examples/custom_data_source/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
+- [`custom_file_casts.rs`](examples/custom_data_source/custom_file_casts.rs): Implement custom casting rules to adapt file schemas
+- [`custom_file_format.rs`](examples/custom_data_source/custom_file_format.rs): Write data to a custom file format
+- [`dataframe_to_s3.rs`](examples/external_dependency/dataframe_to_s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
+- [`dataframe.rs`](examples/dataframe/dataframe.rs): Run a query using a DataFrame API against parquet files, csv files, and in-memory data, including multiple subqueries. Also demonstrates the various methods to write out a DataFrame to a table, parquet file, csv file, and json file.
+- [`date_time_function.rs`](examples/date_time_function.rs): Examples of date-time related functions and queries
+- [`default_column_values.rs`](examples/dataframe/default_column_values.rs): Implement custom default value handling for missing columns using field metadata and PhysicalExprAdapter
+- [`deserialize_to_struct.rs`](examples/dataframe/deserialize_to_struct.rs): Convert query results (Arrow ArrayRefs) into Rust structs
+- [`expr_api.rs`](examples/query_planning/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s
+- [`file_stream_provider.rs`](examples/custom_data_source/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks.
+- [`flight_sql_server.rs`](examples/flight/sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from JDBC clients
+- [`function_factory.rs`](examples/builtin_functions/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
+- [`json_shredding.rs`](examples/data_io/json_shredding.rs): Shows how to implement custom filter rewriting for JSON shredding
+- [`memory_pool_tracking.rs`](examples/execution_monitoring/memory_pool_tracking.rs): Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages
+- [`memory_pool_execution_plan.rs`](examples/execution_monitoring/memory_pool_execution_plan.rs): Shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
+- [`optimizer_rule.rs`](examples/query_planning/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates
+- [`parquet_embedded_index.rs`](examples/parquet/embedded_index.rs): Store a custom index inside a Parquet file and use it to speed up queries
+- [`parquet_encrypted.rs`](examples/parquet/encrypted.rs): Read and write encrypted Parquet files using DataFusion
+- [`parquet_encrypted_with_kms.rs`](examples/parquet/encrypted_with_kms.rs): Read and write encrypted Parquet files using an encryption factory
+- [`parquet_index.rs`](examples/parquet/index.rs): Create an secondary index over several Parquet files and use it to speed up queries
+- [`parquet_exec_visitor.rs`](examples/parquet/exec_visitor.rs): Extract statistics by visiting an ExecutionPlan after execution
+- [`parse_sql_expr.rs`](examples/query_planning/parse_sql_expr.rs): Parse SQL text into DataFusion `Expr`.
+- [`plan_to_sql.rs`](examples/query_planning/plan_to_sql.rs): Generate SQL from DataFusion `Expr` and `LogicalPlan`
+- [`planner_api.rs`](examples/query_planning/planner_api.rs) APIs to manipulate logical and physical plans
+- [`pruning.rs`](examples/query_planning/pruning.rs): Use pruning to rule out files based on statistics
+- [`query_aws_s3.rs`](examples/external_dependency/query_aws_s3.rs): Configure `object_store` and run a query against files stored in AWS S3
+- [`query_http_csv.rs`](examples/data_io/query_http_csv.rs): Configure `object_store` and run a query against files via HTTP
+- [`regexp.rs`](examples/builtin_functions/regexp.rs): Examples of using regular expression functions
+- [`remote_catalog.rs`](examples/data_io/remote_catalog.rs): Examples of interfacing with a remote catalog (e.g. over a network)
+- [`simple_udaf.rs`](examples/simple_udf/udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
+- [`simple_udf.rs`](examples/simple_udf/udf.rs): Define and invoke a User Defined Scalar Function (UDF)
+- [`simple_udfw.rs`](examples/simple_udf/udwf.rs): Define and invoke a User Defined Window Function (UDWF)
+- [`simple_udtf.rs`](examples/simple_udf/udtf.rs): Define and invoke a User Defined Table Function (UDTF)
+- [`sql_analysis.rs`](examples/sql_ops/analysis.rs): Analyse SQL queries with DataFusion structures
+- [`sql_frontend.rs`](examples/sql_ops/frontend.rs): Create LogicalPlans (only) from sql strings
+- [`sql_dialect.rs`](examples/sql_ops/dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser`
+- [`sql_query.rs`](examples/sql_ops/query.rs): Query data using SQL (in memory `RecordBatches`, local Parquet files)
+- [`tracing.rs`](examples/execution_monitoring/tracing.rs): Demonstrates the tracing injection feature for the DataFusion runtime
 
 ## Distributed
 
-- [`flight_client.rs`](examples/flight/flight_client.rs) and [`flight_server.rs`](examples/flight/flight_server.rs): Run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol.
+- [`flight_client.rs`](examples/flight/client.rs) and [`flight_server.rs`](examples/flight/server.rs): Run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol.