diff --git a/docs/.gitignore b/docs/.gitignore
index 765c378eb3b9..baf488338166 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -16,5 +16,4 @@
# under the License.
diff --git a/docs/requirements.txt b/docs/requirements.txt
index d81b90e3c77a..24546d59a45a 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -15,7 +15,6 @@
# specific language governing permissions and limitations
# under the License.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 30d21aa8e6d6..0d507fcbd003 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -31,8 +31,6 @@
# import sys
# sys.path.insert(0, os.path.abspath('.'))
-import datafusion
# -- Project information -----------------------------------------------------
project = 'Arrow DataFusion'
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 76ffe8ecd439..e0b432985462 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -29,7 +29,6 @@ Table of Contents
:caption: Supported Environments
- Python
Command line
.. _toc.guide:
diff --git a/docs/source/python/api.rst b/docs/source/python/api.rst
deleted file mode 100644
index f81753e082e4..000000000000
--- a/docs/source/python/api.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-.. http://www.apache.org/licenses/LICENSE-2.0
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-.. _api:
-API Reference
-.. toctree::
- :maxdepth: 2
- api/dataframe
- api/execution_context
- api/expression
- api/functions
diff --git a/docs/source/python/api/dataframe.rst b/docs/source/python/api/dataframe.rst
deleted file mode 100644
index 0a3c4c8b1c34..000000000000
--- a/docs/source/python/api/dataframe.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-.. http://www.apache.org/licenses/LICENSE-2.0
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-.. _api.dataframe:
-.. currentmodule:: datafusion
-.. autosummary::
- :toctree: ../generated/
- DataFrame
diff --git a/docs/source/python/api/execution_context.rst b/docs/source/python/api/execution_context.rst
deleted file mode 100644
index 5b7e0f82f996..000000000000
--- a/docs/source/python/api/execution_context.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-.. http://www.apache.org/licenses/LICENSE-2.0
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-.. _api.execution_context:
-.. currentmodule:: datafusion
-.. autosummary::
- :toctree: ../generated/
- SessionContext
diff --git a/docs/source/python/api/expression.rst b/docs/source/python/api/expression.rst
deleted file mode 100644
index 45923fb5447f..000000000000
--- a/docs/source/python/api/expression.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-.. http://www.apache.org/licenses/LICENSE-2.0
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-.. _api.expression:
-.. currentmodule:: datafusion
-.. autosummary::
- :toctree: ../generated/
- Expression
diff --git a/docs/source/python/api/functions.rst b/docs/source/python/api/functions.rst
deleted file mode 100644
index 6f10d826e38a..000000000000
--- a/docs/source/python/api/functions.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-.. http://www.apache.org/licenses/LICENSE-2.0
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-.. _api.functions:
-.. currentmodule:: datafusion
-.. autosummary::
- :toctree: ../generated/
- functions
diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst
deleted file mode 100644
index 167e66b9fe44..000000000000
--- a/docs/source/python/index.rst
+++ /dev/null
@@ -1,251 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-.. http://www.apache.org/licenses/LICENSE-2.0
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-DataFusion in Python
-This is a Python library that binds to `Apache Arrow `_ in-memory query engine `DataFusion `_.
-Like pyspark, it allows you to build a plan through SQL or a DataFrame API against in-memory data, parquet or CSV files, run it in a multi-threaded environment, and obtain the result back in Python.
-It also allows you to use UDFs and UDAFs for complex operations.
-The major advantage of this library over other execution engines is that this library achieves zero-copy between Python and its execution engine: there is no cost in using UDFs, UDAFs, and collecting the results to Python apart from having to lock the GIL when running those operations.
-Its query engine, DataFusion, is written in `Rust `_, which makes strong assumptions about thread safety and lack of memory leaks.
-Technically, zero-copy is achieved via the `c data interface `_.
-How to use it
-Simple usage:
-.. code-block:: python
- import datafusion
- from datafusion import functions as f
- from datafusion import col
- import pyarrow
- # create a context
- ctx = datafusion.SessionContext()
- # create a RecordBatch and a new DataFrame from it
- batch = pyarrow.RecordBatch.from_arrays(
- [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
- names=["a", "b"],
- )
- df = ctx.create_dataframe([[batch]])
- # create a new statement
- df = df.select(
- col("a") + col("b"),
- col("a") - col("b"),
- )
- # execute and collect the first (and only) batch
- result = df.collect()[0]
- assert result.column(0) == pyarrow.array([5, 7, 9])
- assert result.column(1) == pyarrow.array([-3, -3, -3])
-We can also execute a query against data stored in CSV
-.. code-block:: bash
- echo "a,b\n1,4\n2,5\n3,6" > example.csv
-.. code-block:: python
- import datafusion
- from datafusion import functions as f
- from datafusion import col
- import pyarrow
- # create a context
- ctx = datafusion.SessionContext()
- # register a CSV
- ctx.register_csv('example', 'example.csv')
- # create a new statement
- df = ctx.table('example').select(
- col("a") + col("b"),
- col("a") - col("b"),
- )
- # execute and collect the first (and only) batch
- result = df.collect()[0]
- assert result.column(0) == pyarrow.array([5, 7, 9])
- assert result.column(1) == pyarrow.array([-3, -3, -3])
-And how to execute a query against a CSV using SQL:
-.. code-block:: python
- import datafusion
- from datafusion import functions as f
- from datafusion import col
- import pyarrow
- # create a context
- ctx = datafusion.SessionContext()
- # register a CSV
- ctx.register_csv('example', 'example.csv')
- # create a new statement via SQL
- df = ctx.sql("SELECT a+b, a-b FROM example")
- # execute and collect the first (and only) batch
- result = df.collect()[0]
- assert result.column(0) == pyarrow.array([5, 7, 9])
- assert result.column(1) == pyarrow.array([-3, -3, -3])
-.. code-block:: python
- def is_null(array: pyarrow.Array) -> pyarrow.Array:
- return array.is_null()
- udf = f.udf(is_null, [pyarrow.int64()], pyarrow.bool_())
- df = df.select(udf(col("a")))
-.. code-block:: python
- import pyarrow
- import pyarrow.compute
- class Accumulator:
- """
- Interface of a user-defined accumulation.
- """
- def __init__(self):
- self._sum = pyarrow.scalar(0.0)
- def to_scalars(self) -> [pyarrow.Scalar]:
- return [self._sum]
- def update(self, values: pyarrow.Array) -> None:
- # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
- self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py())
- def merge(self, states: pyarrow.Array) -> None:
- # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
- self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py())
- def evaluate(self) -> pyarrow.Scalar:
- return self._sum
- df = ...
- udaf = f.udaf(Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()])
- df = df.aggregate(
- [],
- [udaf(col("a"))]
- )
-How to install (from pip)
-.. code-block:: shell
- pip install datafusion
-How to develop
-This assumes that you have rust and cargo installed. We use the workflow recommended by `pyo3 `_ and `maturin `_.
-.. code-block:: shell
- # fetch this repo
- git clone git@github.com:apache/arrow-datafusion.git
- cd arrow-datafusion/python
- # prepare development environment (used to build wheel / install in development)
- python3 -m venv venv
- # activate the venv
- source venv/bin/activate
- pip install -r requirements.txt
-Whenever rust code changes (your changes or via `git pull`):
-.. code-block:: shell
- # make sure you activate the venv using "source venv/bin/activate" first
- maturin develop
- python -m pytest
-How to update dependencies
-To change test dependencies, change the `requirements.in` and run
-.. code-block:: shell
- # install pip-tools (this can be done only once), also consider running in venv
- pip install pip-tools
- # change requirements.in and then run
- pip-compile --generate-hashes
-To update dependencies, run
-.. code-block:: shell
- pip-compile update
-More details about pip-tools `here `_
-API reference
-.. toctree::
- :maxdepth: 2
- api
diff --git a/docs/source/user-guide/sql/datafusion-functions.md b/docs/source/user-guide/sql/datafusion-functions.md
deleted file mode 100644
index 651fe7576c78..000000000000
--- a/docs/source/user-guide/sql/datafusion-functions.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# DataFusion Functions
-This content has moved to [scalar functions](scalar-functions.md)
diff --git a/docs/source/user-guide/sql/sql_status.md b/docs/source/user-guide/sql/sql_status.md
index 13a7e5e748aa..686ba73b70c2 100644
--- a/docs/source/user-guide/sql/sql_status.md
+++ b/docs/source/user-guide/sql/sql_status.md
@@ -83,12 +83,12 @@
- [ ] Basic date functions
- [ ] Basic time functions
- [x] Basic timestamp functions
- - [x] [to_timestamp](./datafusion-functions.md#to_timestamp)
- - [x] [to_timestamp_millis](./datafusion-functions.md#to_timestamp_millis)
- - [x] [to_timestamp_micros](./datafusion-functions.md#to_timestamp_micros)
- - [x] [to_timestamp_seconds](./datafusion-functions.md#to_timestamp_seconds)
- - [x] [extract](./datafusion-functions.md#extract)
- - [x] [date_part](./datafusion-functions.md#date_part)
+ - [x] [to_timestamp](./scalar_functions.md#to_timestamp)
+ - [x] [to_timestamp_millis](./scalar_functions.md#to_timestamp_millis)
+ - [x] [to_timestamp_micros](./scalar_functions.md#to_timestamp_micros)
+ - [x] [to_timestamp_seconds](./scalar_functions.md#to_timestamp_seconds)
+ - [x] [extract](./scalar_functions.md#extract)
+ - [x] [date_part](./scalar_functions.md#date_part)
- nested functions
- [x] Array of columns
- [x] Schema Queries